diff --git "a/measurement.json" "b/measurement.json" new file mode 100644--- /dev/null +++ "b/measurement.json" @@ -0,0 +1,185758 @@ +{ + "measurement": [ + { + "key": "model.layers.0.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.01709277555346489, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.0169401615858078, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.008080927655100822, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.00804868247359991, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.008013698272407055, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.005084589589387178, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.024825574830174446, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.016826074570417404, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.00803152285516262, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.007992335595190525, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.00855429656803608, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.009733475744724274, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.007982557639479637, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.005706761963665485, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.005066630896180868, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.006114153191447258, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.005059986375272274, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0048502786085009575, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.0050587947480380535, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.004848626907914877, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.005360267590731382, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.005058311391621828, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.005077185574918985, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00484774075448513, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.017924174666404724, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.017682310193777084, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.00851957593113184, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.008459211327135563, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.008399879559874535, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.004059818107634783, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.02198420837521553, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.01752050407230854, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0084372004494071, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.008357389830052853, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.00861254334449768, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.00921688787639141, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.008340338245034218, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.005109001416712999, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.004014019388705492, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.005090157501399517, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.00399411004036665, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0035698532592505217, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.003990142606198788, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0035647430922836065, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.003726732451468706, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.003989023622125387, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0031739049591124058, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0035615789238363504, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.057056527584791183, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.0474255196750164, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.037564486265182495, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.03190585970878601, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.025391940027475357, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.01815146952867508, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.04810655117034912, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.036428119987249374, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.027078961953520775, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.020877903327345848, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.02148480713367462, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.025047753006219864, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.01778152398765087, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.012648796662688255, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.011101748794317245, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.012523031793534756, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.0071775661781430244, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.006496218964457512, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.006395156029611826, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.005497948732227087, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006431201938539743, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.005903530400246382, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0038138043601065874, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004379678051918745, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.025069214403629303, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.021363666281104088, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.01941162347793579, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.014428761787712574, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.010752790607511997, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.009253857657313347, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.014993753284215927, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.01288621500134468, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.01114431582391262, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.008417910896241665, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.007416543550789356, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.007451856043189764, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.00651900190860033, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.005659425165504217, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.005452646408230066, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.004138348624110222, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.003911835141479969, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0038272261153906584, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.0036411015316843987, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0035329079255461693, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00288303941488266, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0034547592513263226, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.002689362270757556, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0031864072661846876, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.058500729501247406, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.054715346544981, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.053478289395570755, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.048470769077539444, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.026592373847961426, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.02534022182226181, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.029705997556447983, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.02748830057680607, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.026954755187034607, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.024068674072623253, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.022860851138830185, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.015158514492213726, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.013322189450263977, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.012928674928843975, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.012837904505431652, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.007680442184209824, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.007129201199859381, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.007094100117683411, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.006676741875708103, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.006619117222726345, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.004360264632850885, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.004948033485561609, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.004228212870657444, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.003981405403465033, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.08265147358179092, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.07737711071968079, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.07570714503526688, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.06870528310537338, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.037519436329603195, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.035806890577077866, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.0418463759124279, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.038678959012031555, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.03804038465023041, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.033940039575099945, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.0322447344660759, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.021192152053117752, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.01843978650867939, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.01793469674885273, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.017813943326473236, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.010600284673273563, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.009372420608997345, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.00933266431093216, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.00867576152086258, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.008602526970207691, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.00567114632576704, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.005882485769689083, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.005488700699061155, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.004166372120380402, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.06113075464963913, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.05186113342642784, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.033881619572639465, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.0310084018856287, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.025574635714292526, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.01684596575796604, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.05043838173151016, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.04492798447608948, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.02722068503499031, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.022335350513458252, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.02323176898062229, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.025748370215296745, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.020147383213043213, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.014454139396548271, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.012211975641548634, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.012881170026957989, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.00979267992079258, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.008625108748674393, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.009382573887705803, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.008034154772758484, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.009100445546209812, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.009124526754021645, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.007408177945762873, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.007490906864404678, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.015054361894726753, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.01207188330590725, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.00909723062068224, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.008002527989447117, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.006412492133677006, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.004283353686332703, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.011682429350912571, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.009556745178997517, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.00696337316185236, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.005301781464368105, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.005678435787558556, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.006205928046256304, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.004617503844201565, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0031881253235042095, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0027398488018661737, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.003199078608304262, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.0018077432177960873, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.001605470199137926, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.0016186852008104324, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0013484806986525655, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0017194991232827306, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0015102438628673553, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0010738299461081624, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0010876194573938847, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.013749486766755581, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.010727744549512863, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.00828280858695507, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.007262433413416147, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.005752455908805132, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0039557646960020065, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.009423624724149704, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.008278272114694118, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.006329233292490244, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.004642304964363575, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.004718365613371134, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0047996314242482185, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.003967298660427332, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.002844590926542878, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0025150005239993334, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.002419555326923728, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.0016080110799521208, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0014689507661387324, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.0014166731853038073, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0012138778110966086, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0013102854136377573, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0013102866942062974, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0009106158395297825, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0009600010816939175, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.0800551101565361, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.06740349531173706, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.06195298954844475, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.05327775329351425, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.035819485783576965, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.03049854189157486, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.04460785537958145, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04090667515993118, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.03767811879515648, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.028918107971549034, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.02708440274000168, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.022765126079320908, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.019659385085105896, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.017311597242951393, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.016731513664126396, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01143272127956152, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.00916137546300888, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.008943997323513031, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.00783192552626133, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0074421013705432415, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0060389963909983635, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006196027155965567, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.005223078653216362, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004329840652644634, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12491796910762787, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.07820632308721542, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.05491204932332039, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.05468059703707695, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05067034065723419, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.029789792373776436, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07541316002607346, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0668729767203331, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0593835823237896, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.03502636402845383, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03690676763653755, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0388508215546608, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03319575637578964, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0260832030326128, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02413068152964115, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.020122677087783813, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.016141997650265694, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.015689294785261154, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013834438286721706, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012736677192151546, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.011903909966349602, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.013611442409455776, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009823027066886425, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011557871475815773, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1049945205450058, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.09994274377822876, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.0984378308057785, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.09048397094011307, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.04698331654071808, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.04552663117647171, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.05189480632543564, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.047937337309122086, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.04740137606859207, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.043619222939014435, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.042044706642627716, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.026398127898573875, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0229513980448246, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.022553713992238045, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.022450564429163933, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.013288410380482674, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.012116733007133007, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.01208600215613842, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.011507032439112663, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.011455134488642216, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.007445181254297495, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.008024930022656918, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.0073185577057302, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.006206765305250883, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1255987137556076, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.11986173689365387, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.11815305799245834, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.10881923884153366, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.05634288489818573, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.05466035380959511, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.06206067278981209, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.05740794539451599, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.05682910233736038, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.05237565562129021, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.05040789768099785, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.031257376074790955, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.027133869007229805, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.026676708832383156, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.02657371759414673, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.01556963101029396, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.013606192544102669, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.013573410920798779, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.012825651094317436, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.012759271077811718, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.008139059878885746, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.00802245456725359, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.007987079210579395, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00516357459127903, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.09622859209775925, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.08692048490047455, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.07882006466388702, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.07188577204942703, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.04324815794825554, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.03723656386137009, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.0566582977771759, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.05325848609209061, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.0444946326315403, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.038944266736507416, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.03765278682112694, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.028307460248470306, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.026385502889752388, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.021875927224755287, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.020084423944354057, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.016102632507681847, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.012360915541648865, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.011229459196329117, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.011644978076219559, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.010321884416043758, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.009694493375718594, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.009726721793413162, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.007636482361704111, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.006941691506654024, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.032990649342536926, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.03044019639492035, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.029197216033935547, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.026182999834418297, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.01491070818156004, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.01382515113800764, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.018950678408145905, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.016226159408688545, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.015182926319539547, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.013281832449138165, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.012960966676473618, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0096158217638731, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.007786821108311415, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.007171241566538811, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.007014656905084848, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.004850892815738916, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.0037921613547950983, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0037191957235336304, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.0034828234929591417, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0033854523207992315, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0026232704985886812, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0025074707809835672, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.002291000448167324, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0017895139753818512, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.02608063630759716, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.023988332599401474, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.023081164807081223, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.02066715434193611, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.011731100268661976, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.01092792209237814, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.013791298493742943, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.012601405382156372, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0119626484811306, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.010406878776848316, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.009885860607028008, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.006956399418413639, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.006001552566885948, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.005616083275526762, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.005522029008716345, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.003490089438855648, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.0029318504966795444, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.002892911434173584, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.0026750690303742886, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0026163789443671703, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0018530752277001739, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0018748885486274958, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00171424460131675, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0012953946134075522, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11014457792043686, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10292940586805344, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10063391923904419, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09121032804250717, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05049784108996391, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.048062920570373535, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05640634894371033, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05214403197169304, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05126567184925079, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.045490555465221405, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04314970225095749, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02856658771634102, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.024804914370179176, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02406400814652443, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02388620190322399, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014247504062950611, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012247389182448387, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012191473506391048, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011227994225919247, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01111333817243576, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007354092318564653, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007230382412672043, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007079262752085924, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004532382357865572, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.14668400585651398, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11938081681728363, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10914205759763718, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08804181963205338, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06622595340013504, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05532988905906677, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.08175987750291824, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.07361060380935669, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06939797848463058, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04918363690376282, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04468075558543205, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0422094352543354, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03608868271112442, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03282957524061203, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.03203266113996506, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.021765058860182762, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.0187365859746933, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.018494654446840286, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01584574021399021, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.015353974886238575, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.012957325205206871, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.013941104523837566, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01201486773788929, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011471876874566078, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.140333354473114, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1338118463754654, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.13194046914577484, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.12167081981897354, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.06392042338848114, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.06195753812789917, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.07057638466358185, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.06503104418516159, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.06449414789676666, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.05930522084236145, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.05741734802722931, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0358634777367115, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.031021900475025177, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03054043836891651, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.030424553900957108, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.017990965396165848, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.0159860011190176, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.015955710783600807, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.015102379024028778, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.015035923570394516, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.009832954965531826, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.010028671473264694, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.009684770368039608, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.0072161429561674595, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1668270230293274, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.15919537842273712, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.15688790380954742, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14496076107025146, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.07618047297000885, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07385136187076569, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.08398685604333878, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.07749655842781067, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.07686218619346619, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07070630043745041, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06828954815864563, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04245879128575325, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.03678342327475548, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03620349243283272, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0360703319311142, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02120067924261093, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.01848062500357628, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.01844385266304016, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.01739044114947319, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.017306262627243996, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.011162903159856796, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.010906253010034561, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.010978472419083118, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.007043260149657726, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.17061302065849304, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.15465007722377777, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.1486176997423172, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.1347542107105255, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.07677680999040604, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.07088448107242584, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.08950039744377136, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.08234801888465881, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.07903555035591125, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.06807336211204529, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.0651320219039917, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.04541301727294922, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.03930849954485893, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.036811504513025284, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.03620578721165657, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.022795094177126884, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.0192713662981987, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.019127601757645607, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.017580654472112656, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.017203425988554955, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.012325940653681755, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01238423865288496, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.011520283296704292, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.00865873135626316, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.04749276489019394, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.04409316927194595, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.042433708906173706, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.0382302962243557, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.02173636481165886, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.020261717960238457, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.027786757797002792, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.023397624492645264, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.02210170589387417, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.019446387887001038, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.018619775772094727, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.014208123087882996, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.011247443966567516, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.010438141413033009, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.010235200636088848, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.007162180729210377, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.005459212698042393, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.005364926531910896, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.005013341084122658, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.004886220209300518, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0037994005251675844, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0035230061039328575, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.003263303777202964, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0024415128864347935, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.037772536277770996, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.0350261926651001, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.033751748502254486, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.030338848009705544, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.017224883660674095, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.016123922541737556, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.020213346928358078, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.018473900854587555, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.017534950748085976, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.015377752482891083, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.014667018316686153, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.010229721665382385, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.008812339045107365, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.00827595591545105, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.00814308412373066, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.005129613913595676, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.004342574160546064, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.004286599811166525, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.0039847856387495995, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0039023233111947775, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.002733435947448015, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0028001496102660894, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0025477581657469273, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0019781168084591627, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.15852497518062592, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.14836403727531433, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1451798528432846, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1315533071756363, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.07326563447713852, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06975909322500229, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.08236496895551682, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.07556288689374924, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.07431838661432266, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06599479913711548, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0627777948975563, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.04172157496213913, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03599737584590912, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.034954171627759933, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.03470612317323685, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.020839838311076164, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01779787428677082, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.017721137031912804, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01630483753979206, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.016144542023539543, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.010796038433909416, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.010492225177586079, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.010400970466434956, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006567428819835186, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1331513673067093, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11351703107357025, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10267053544521332, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08359014987945557, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.060652002692222595, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05126390978693962, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.08460171520709991, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.07179075479507446, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06284540146589279, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04828548803925514, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04507480561733246, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.04315728694200516, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03611591458320618, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.031180545687675476, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.029862962663173676, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.023328933864831924, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.019168678671121597, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01864890567958355, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01726764254271984, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.016538355499505997, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.014778193086385727, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015663810074329376, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.013191520236432552, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.013555661775171757, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17215046286582947, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16332930326461792, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16069133579730988, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14753036201000214, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08014983683824539, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07726268470287323, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.08850374817848206, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.0818093791604042, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.0809868574142456, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07358346879482269, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.0707569345831871, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.045334767550230026, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.039479680359363556, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03874645009636879, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0385703481733799, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02273123525083065, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02077019400894642, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.020722785964608192, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019540121778845787, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019436407834291458, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012542102485895157, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013694060035049915, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012299712747335434, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01050338800996542, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.20224077999591827, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.19201482832431793, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.18891844153404236, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1735312044620514, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09392376989126205, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.0905410498380661, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10369612276554108, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.0958753377199173, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09492608159780502, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08622702956199646, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08280754089355469, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05273932218551636, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0457538403570652, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.0449177622795105, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04469728842377663, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02637835219502449, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02317553572356701, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02312627248466015, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02164613828063011, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02152651734650135, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014012071304023266, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014073720201849937, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013725612312555313, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009572130627930164, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.04673415794968605, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.04280422627925873, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.022707222029566765, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.02007300965487957, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.012889407575130463, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.011735339649021626, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.04144702106714249, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.03649528697133064, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.013404035940766335, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.010922243818640709, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.0103015610948205, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.008209124207496643, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.007081408519297838, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.006350628565996885, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.0067878607660532, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.004217030946165323, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.004501819144934416, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.0034439198207110167, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.00422452948987484, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.003023639088496566, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.003687645308673382, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.0037725341971963644, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.0021418703254312277, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.0019413841655477881, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.062029872089624405, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.057070787996053696, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.055066972970962524, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.049369171261787415, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.028342917561531067, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.026465069502592087, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.032681893557310104, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.030126862227916718, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.028920061886310577, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.02507771924138069, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.023754773661494255, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.016550835222005844, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.014359625987708569, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.013574096374213696, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.013383056037127972, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.00827124621719122, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.007026093080639839, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.006952668074518442, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.006375491619110107, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.006255671847611666, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.004327498842030764, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00438177352771163, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.004062349442392588, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.002943348605185747, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.053548455238342285, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.04921286180615425, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.047417279332876205, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.04250430315732956, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.024452708661556244, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.022790959104895592, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.02843744307756424, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.026097090914845467, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.02494773268699646, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.021620415151119232, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.020559728145599365, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.014399152249097824, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.01243579015135765, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.011708375066518784, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.011534316465258598, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.007200297899544239, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.006050632335245609, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.005981886759400368, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.005488893482834101, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.005376268178224564, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.003761435393244028, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.003771084826439619, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0035165203735232353, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0025090803392231464, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.16813522577285767, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.15669891238212585, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1530008167028427, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1383090764284134, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0779874324798584, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0739370733499527, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.08743228018283844, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.08070726692676544, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.07922988384962082, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06977202743291855, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0661042183637619, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.04430701956152916, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03845323994755745, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03722957521677017, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.036936718970537186, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02210504747927189, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.018939102068543434, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.018849365413188934, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.017246585339307785, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01705964095890522, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.011389199644327164, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.011157241649925709, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.010934848338365555, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0069341217167675495, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.18016289174556732, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1566971391439438, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.14711308479309082, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.12337516248226166, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.08396001905202866, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07444626837968826, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10135889053344727, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0923774316906929, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.08702874928712845, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06756667792797089, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06174537166953087, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.052746519446372986, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.045074280351400375, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0411505252122879, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.040179166942834854, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.026664111763238907, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02251904457807541, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.022182349115610123, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01951030269265175, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01888928934931755, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.014980267733335495, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015856266021728516, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.013738056644797325, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.012269312515854836, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.18752145767211914, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1773001253604889, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.17426639795303345, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15936464071273804, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08738385885953903, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08396647870540619, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.0965052917599678, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08930927515029907, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08842462301254272, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07965786755084991, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07610254734754562, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.049038756638765335, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.042637791484594345, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.041784901171922684, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.041578877717256546, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02449014037847519, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02153460681438446, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.021475384011864662, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019979003816843033, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019854584708809853, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012890554033219814, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013030570931732655, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012604453600943089, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008770513348281384, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.21946783363819122, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.20753686130046844, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.20403099060058594, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.18662098050117493, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10224636644124985, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09822191298007965, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11281586438417435, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1044572964310646, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10344550758600235, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09324080497026443, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08900509774684906, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05723718926310539, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.049775078892707825, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04878560081124306, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.048547279089689255, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.028542397543787956, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02481118217110634, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.024749983102083206, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.022956697270274162, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02280571311712265, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014781332574784756, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014509415253996849, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014446378685534, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009085004217922688, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.22068296372890472, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.20035627484321594, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.19305580854415894, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.17284588515758514, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.1009889766573906, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.0935516208410263, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.11634854972362518, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.10714886337518692, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.10369102656841278, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.08858808130025864, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08360087126493454, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.05920357629656792, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05121907219290733, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.04844135046005249, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.04779055714607239, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.02966219186782837, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.025210699066519737, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.025075498968362808, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02276609092950821, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.022349948063492775, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.015913916751742363, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.015898512676358223, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.015033666975796223, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.010916491970419884, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.07254652678966522, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.06671522557735443, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.06429656594991684, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.05756739154458046, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0332702100276947, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.03097803331911564, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0384305939078331, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.035542432218790054, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.033951036632061005, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.029387380927801132, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.027752920985221863, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.01948491483926773, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.016975149512290955, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.01596565544605255, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.01572183519601822, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.00975475087761879, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.00831818487495184, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.008222517557442188, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.007556839846074581, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0074016135185956955, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.005143266171216965, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.005282287951558828, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.004807773511856794, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0036349683068692684, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.06187959760427475, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.05686575174331665, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.05466931313276291, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.04893496632575989, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.028370030224323273, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.02636982686817646, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.033211253583431244, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.030488090589642525, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.02895996905863285, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.02505083940923214, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.023805400356650352, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.016832880675792694, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.01454994734376669, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.013615415431559086, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.013390487059950829, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.008424056693911552, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.007066299673169851, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.006972531788051128, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.006409481167793274, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.006264166906476021, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.004427983425557613, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0044622295536100864, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0041132434271276, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0030179074965417385, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1790548861026764, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.16663594543933868, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.16259367763996124, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1466226875782013, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.08325377851724625, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07880860567092896, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.09356501698493958, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0862511470913887, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.084633469581604, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07426513731479645, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07023316621780396, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.04749641939997673, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04114973917603493, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03978440538048744, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.039451733231544495, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.023706592619419098, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.020273400470614433, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.020167088136076927, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.018421240150928497, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01821458712220192, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.012236220762133598, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.012006665579974651, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.011726407334208488, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00752490246668458, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.18652278184890747, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.16308243572711945, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1552506387233734, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.12969908118247986, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.08702359348535538, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07796929031610489, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10116207599639893, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09259301424026489, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.08965236693620682, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07048305124044418, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06309644132852554, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.052306801080703735, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04518702253699303, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.042650457471609116, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04203540459275246, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02650885097682476, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.023376479744911194, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.023209096863865852, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.020343473181128502, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.019961263984441757, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.014904068782925606, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016259009018540382, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014132370240986347, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.012812742032110691, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.176067516207695, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16600726544857025, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1629294604063034, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14861488342285156, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08231642842292786, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07886603474617004, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.0911005511879921, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08428502827882767, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08337036520242691, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07471247762441635, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07122733443975449, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.046437863260507584, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.040358975529670715, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.039482392370700836, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03927238658070564, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02320677973330021, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.020519187673926353, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02045922726392746, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.01900581084191799, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.018878191709518433, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012340693734586239, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.012687045149505138, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012054620310664177, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008827744983136654, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2260969877243042, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21326272189617157, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.20941869914531708, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19104990363121033, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.1056494265794754, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10124015808105469, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11687971651554108, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10809741914272308, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10698201507329941, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0958714634180069, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09133199602365494, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.059323228895664215, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05156485363841057, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.050449568778276443, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0501839704811573, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.029617540538311005, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02565421164035797, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.025583624839782715, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.023639380931854248, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.023473259061574936, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01535992044955492, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01500401459634304, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014983544126152992, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009381265379488468, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.22279061377048492, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.20169968903064728, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.19416074454784393, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.17337925732135773, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10215133428573608, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09439512342214584, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.11789199709892273, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.10854312032461166, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.10501328855752945, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.08923377096652985, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08406166732311249, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.060108624398708344, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05198409780859947, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.049107879400253296, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.04840739071369171, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.030192971229553223, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.025694245472550392, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02556655742228031, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02317618951201439, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02273656614124775, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01636618562042713, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01641020178794861, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.015453187748789787, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.011495949700474739, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.08905483037233353, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.08268837630748749, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08021772652864456, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07228309661149979, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04122625291347504, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.038818638771772385, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.04693261906504631, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04336866736412048, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04192209988832474, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.03676788508892059, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03480393812060356, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.023836679756641388, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.020749596878886223, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.01979946717619896, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.019570400938391685, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.011927495710551739, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.010303989984095097, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.010215863585472107, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.009418745525181293, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0092771602794528, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006294552702456713, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0064674848690629005, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.005976962391287088, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004468669183552265, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.07340514659881592, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.0682239904999733, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.06609223037958145, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.05961817502975464, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.03400793671607971, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.031980883330106735, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.03917799890041351, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.035939186811447144, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.03458324819803238, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.030360933393239975, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.02886858396232128, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.019887354224920273, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.017172738909721375, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.016307301819324493, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.016099845990538597, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.009952066466212273, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.008429058827459812, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.008341371081769466, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.007695098873227835, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.007566310930997133, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.005216316785663366, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.005223175976425409, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.004922214429825544, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.003488657996058464, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.19785352051258087, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.18486258387565613, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1807030290365219, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.16347357630729675, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09234911948442459, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08767451345920563, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10330378264188766, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09534256905317307, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09372168779373169, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08273010700941086, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07828408479690552, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05243296176195145, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04547679051756859, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04410763457417488, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04377878084778786, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.026158347725868225, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.022412797436118126, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.022307472303509712, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.020429369062185287, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.020219093188643456, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.013459036126732826, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01316065713763237, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.012952087447047234, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00814090110361576, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1846664696931839, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.15695753693580627, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1458507925271988, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1178332194685936, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.08544021099805832, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07423532009124756, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10330241918563843, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0945725366473198, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0887264683842659, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06572423130273819, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05928926542401314, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0533779114484787, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04600749909877777, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04180733114480972, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.040784869343042374, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02680952288210392, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02283935621380806, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.022477416321635246, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01923430897295475, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.018553951755166054, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.014873551204800606, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016053471714258194, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.013575938530266285, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.012365303002297878, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.16638405621051788, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1566946804523468, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.15370233356952667, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14005520939826965, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.07785061746835709, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07448309659957886, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.0861300677061081, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.0798143744468689, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.0788780152797699, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0705077052116394, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06707629561424255, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0437961220741272, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.03815232217311859, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03726760298013687, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0370592437684536, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.021877050399780273, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.019175488501787186, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.019115615636110306, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.017692219465970993, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01755945011973381, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.011460630223155022, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.011572890914976597, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01117317657917738, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.007702739909291267, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.22736775875091553, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21420112252235413, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21020586788654327, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19164399802684784, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10641003400087357, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10186979919672012, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11769846826791763, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10900270193815231, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10781119763851166, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09641653299331665, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09173277020454407, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05978237837553024, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05201781913638115, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05084332078695297, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05056123808026314, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.029840989038348198, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.025863422080874443, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.025788404047489166, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.023799914866685867, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02361849881708622, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015467819757759571, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015158900991082191, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015065164305269718, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00948277022689581, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.22163362801074982, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.19948047399520874, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.19135750830173492, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.17084480822086334, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.1013789251446724, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09304909408092499, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1178407371044159, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.1085088700056076, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.10443755984306335, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.08820049464702606, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.083133764564991, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.059976816177368164, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05198302119970322, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.04877171665430069, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.04799100384116173, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.030099010095000267, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.025586742907762527, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.025426309555768967, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.023020820692181587, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.022533010691404343, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.016226189211010933, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01648608222603798, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.015206395648419857, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.011609719134867191, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.08669526875019073, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.08065399527549744, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.07825367152690887, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07051733881235123, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0401424914598465, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.03779113292694092, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.045760560780763626, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0422334149479866, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0408053882420063, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.035823848098516464, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.033937111496925354, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.023213358595967293, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.020176339894533157, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.019243724644184113, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.01901502162218094, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01161100436002016, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.009934656322002411, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.009848003275692463, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.009068259969353676, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.008927879855036736, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006083384621888399, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006134526338428259, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.005761550739407539, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004102594684809446, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.07220778614282608, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.06720919162034988, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.06506142020225525, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.05867276340723038, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.03345682471990585, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.03143105283379555, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.03848642855882645, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.03546221926808357, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.03399895131587982, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.029891934245824814, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.028383782133460045, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.019504616037011147, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.016931500285863876, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.016029298305511475, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.015816980972886086, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.009755893610417843, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.008275606669485569, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.00818557757884264, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.007555865682661533, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.007421397138386965, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.005094490014016628, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.005122808273881674, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.004794593900442123, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0033944977913051844, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1948186755180359, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.18213596940040588, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.17800478637218475, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.16094225645065308, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09097892045974731, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0863635390996933, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10213134437799454, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0940176248550415, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09232306480407715, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08150937408208847, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.077194944024086, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05189650505781174, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0448477640748024, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04345812276005745, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.043118223547935486, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02588299661874771, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.022091269493103027, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.021985603496432304, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.020135708153247833, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01992339827120304, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.013326275162398815, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01297800987958908, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.012779472395777702, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.008033735677599907, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.20672829449176788, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.177724689245224, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.16607119143009186, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.13902530074119568, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09435439854860306, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08314083516597748, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1156347393989563, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10541233420372009, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09895357489585876, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07722490280866623, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06928705424070358, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.059539783746004105, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05155429244041443, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04655435308814049, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04533113166689873, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03022611327469349, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026100216433405876, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.025712022557854652, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.023063914850354195, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.022329553961753845, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017208006232976913, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.019059723243117332, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015722399577498436, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01530697662383318, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1645163595676422, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.15492433309555054, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.15193462371826172, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.13832619786262512, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.07717859745025635, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07380400598049164, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.08558356016874313, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.07922465354204178, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.07819657027721405, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.06982443481683731, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06642777472734451, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04355664178729057, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.03792058303952217, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.037000659853219986, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03678826242685318, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.021790672093629837, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.01912684738636017, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.019067544490098953, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.017650172114372253, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.017511889338493347, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.011506487615406513, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01168357115238905, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.0112010408192873, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.007939618080854416, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2291630506515503, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21582166850566864, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21178731322288513, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1928296834230423, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10763363540172577, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.1029878482222557, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11929447948932648, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1103331446647644, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10902538150548935, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09743011742830276, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.0927106961607933, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06077302619814873, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05279252305626869, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05156831815838814, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.051282256841659546, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.030351709574460983, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.026608586311340332, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.026527484878897667, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024555863812565804, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02436911128461361, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015989748761057854, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.0161743201315403, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015581537038087845, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010906055569648743, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.21819555759429932, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.19523051381111145, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.18684981763362885, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.16631419956684113, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.09966233372688293, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09111609309911728, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.11716927587985992, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.10695379972457886, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.10287000983953476, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.08620598912239075, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08126897364854813, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.05944445729255676, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.051472123712301254, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.0482042133808136, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.04739964008331299, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.030070863664150238, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.025737207382917404, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.025568664073944092, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.023185471072793007, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02268008328974247, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.016712874174118042, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01716272532939911, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01569412276148796, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012694774195551872, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09891710430383682, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.0920797660946846, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08937805891036987, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08057205379009247, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04583200812339783, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0431964173913002, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.052227798849344254, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.048226773738861084, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.046603672206401825, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04095422849059105, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03879210725426674, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02648547664284706, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.023033570498228073, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.021972062066197395, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.021717814728617668, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01324660237878561, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011359957046806812, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01125817559659481, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010376392863690853, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010214119218289852, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006950160954147577, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007024083752185106, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006594175938516855, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004709627479314804, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.08055595308542252, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.07504235208034515, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.07268760353326797, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.06553768366575241, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.03735083341598511, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.03512301295995712, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.042875465005636215, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.039583124220371246, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.03795339912176132, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.033382583409547806, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.031712114810943604, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.021753830835223198, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.018904076889157295, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.01791241578757763, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.017670294269919395, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.010876511223614216, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.009245696477591991, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.009146306663751602, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.008449580520391464, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.008297928608953953, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.005696011241525412, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.005726674571633339, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.005376556422561407, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0038061582017689943, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.21142077445983887, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1976364701986313, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.19316068291664124, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1747439056634903, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09876251220703125, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09379652142524719, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11107545346021652, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10205809026956558, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1002281978726387, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0884796530008316, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08395911008119583, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05645569786429405, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0486922413110733, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04718644917011261, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.046816859394311905, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.028172338381409645, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.023985480889678, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.023864634335041046, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.021857397630810738, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.021631916984915733, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.014517542906105518, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.014101351611316204, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.013903263956308365, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00872845109552145, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1984063684940338, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1724107265472412, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.16368845105171204, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.14014996588230133, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09205842763185501, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08233273029327393, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10691031068563461, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09813105314970016, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09485593438148499, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07441993802785873, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0685599073767662, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05531776323914528, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04786323383450508, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04519360512495041, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04455607756972313, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02811959758400917, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02486495114862919, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02468823827803135, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.021651679649949074, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02123863436281681, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016176648437976837, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017405102029442787, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015385597012937069, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.013790124095976353, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17043879628181458, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16044659912586212, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1573226898908615, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14326848089694977, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08014019578695297, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07662828266620636, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.08891230076551437, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08227866888046265, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08121108263731003, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07251400500535965, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06893918663263321, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.045354411005973816, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.039493344724178314, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03852011263370514, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03828907385468483, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02268684282898903, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.020069990307092667, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02000373601913452, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.01855172961950302, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.0184104535728693, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012069664895534515, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.012497260235249996, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.011752170510590076, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008775954134762287, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.230726420879364, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21732430160045624, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21316862106323242, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19424258172512054, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10824206471443176, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10353300720453262, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1200827956199646, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11105554550886154, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10970664769411087, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09793896973133087, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09313786029815674, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06100454553961754, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05301356315612793, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.051758863031864166, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.051459893584251404, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.030483193695545197, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.026357488706707954, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.026272926479578018, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024232791736721992, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02403796650469303, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01584550552070141, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015486132353544235, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015413185581564903, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00972020998597145, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.22855332493782043, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.20399442315101624, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.19495812058448792, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.17348617315292358, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10455203056335449, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09524274617433548, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12254765629768372, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11235187947750092, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.10796032845973969, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09005962312221527, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08479011803865433, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06236402317881584, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05401686206459999, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05049285665154457, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.04964962229132652, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03139130026102066, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.026806088164448738, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.026640815660357475, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.024025531485676765, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02348361536860466, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01715984381735325, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017690399661660194, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01604299619793892, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012878921814262867, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1055462583899498, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09830518066883087, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09546705335378647, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08620452880859375, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04906850680708885, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.046273164451122284, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.055899396538734436, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.051580291241407394, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04985035955905914, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04388013854622841, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04162614047527313, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02843499556183815, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.024758504703640938, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.023634828627109528, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.023358633741736412, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01425836980342865, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012421149760484695, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012316294014453888, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01140845287591219, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011243153363466263, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007608063519001007, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007963841781020164, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0072368900291621685, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00569892255589366, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.08994872123003006, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.08384962379932404, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.081329345703125, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07348037511110306, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04180513694882393, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.03939349204301834, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.04788520187139511, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.044164687395095825, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.042478714138269424, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.037414923310279846, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.035578496754169464, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.024317309260368347, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.021105092018842697, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.020048899576067924, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.019793951883912086, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.012165687046945095, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.0103604756295681, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.010260024107992649, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.009477293118834496, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.009320616722106934, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006381981074810028, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006421793717890978, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006030950229614973, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004300147760659456, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.20721374452114105, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.19392745196819305, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.18950165808200836, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.17146730422973633, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09689447283744812, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09204362332820892, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10909738391637802, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10017621517181396, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09831790626049042, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0868857279419899, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08240963518619537, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05544726550579071, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04782027378678322, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04631389304995537, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04594828188419342, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02765689045190811, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.023595236241817474, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.023473205044865608, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.021529551595449448, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.021307535469532013, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.014292247593402863, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.013939335942268372, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.013692858628928661, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.008729805238544941, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.20867909491062164, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.18068860471248627, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.16955487430095673, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.13682565093040466, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09586639702320099, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08497462421655655, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11638551205396652, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10644600540399551, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09967584908008575, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07661467790603638, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06877310574054718, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06017042696475983, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05225526914000511, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.047515977174043655, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04633184149861336, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.030624205246567726, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026822030544281006, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02642478421330452, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02346561849117279, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.022740984335541725, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017676016315817833, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.019734585657715797, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016266100108623505, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01604376547038555, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17266696691513062, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16255854070186615, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.15936066210269928, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1450483649969101, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08119019865989685, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07756824046373367, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09008844196796417, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08338910341262817, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08224700391292572, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07340515404939651, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06977526843547821, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04596743360161781, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04000047221779823, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03899955749511719, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.038765426725149155, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.022984543815255165, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.020299656316637993, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.020232658833265305, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.018746253103017807, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.018605319783091545, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012211103923618793, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.012612592428922653, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.011879495345056057, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008822576142847538, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23271438479423523, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21915777027606964, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2149985432624817, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19574731588363647, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10920315980911255, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10440421104431152, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12118634581565857, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11203432083129883, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11065290868282318, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09873112291097641, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09389843791723251, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06154713034629822, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.053496260195970535, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05221625044941902, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0519072599709034, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.030732035636901855, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.026588503271341324, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.026499439030885696, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024424903094768524, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024235133081674576, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015951042994856834, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015631135553121567, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015509619377553463, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009829694405198097, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.23560790717601776, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.21089543402194977, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.20168517529964447, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.17911355197429657, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10806766897439957, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09856770932674408, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1262742429971695, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11611056327819824, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11150403320789337, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09318477660417557, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08760827779769897, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06458848714828491, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.055926136672496796, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05225323885679245, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05137380212545395, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03252437710762024, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.027853453531861305, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.027662057429552078, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02501501515507698, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02445482276380062, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.0178980715572834, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018529556691646576, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01673162542283535, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.013653247617185116, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11295926570892334, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10528314113616943, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10229863226413727, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09225551784038544, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05244657024741173, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04950115829706192, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05953428894281387, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.055014751851558685, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05326925218105316, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04688029736280441, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.044391196221113205, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.030269358307123184, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02632955089211464, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.025170927867293358, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.024898212403059006, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01514393836259842, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01307362038642168, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01297020260244608, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011968600563704967, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01179859135299921, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007979516871273518, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008164196275174618, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0075888666324317455, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005595555063337088, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.0926959365606308, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.08644603192806244, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08375227451324463, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07555487751960754, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04300282523036003, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04049260914325714, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0492919459939003, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.045516595244407654, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.043685246258974075, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.03846912458539009, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03648729622364044, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.024972757324576378, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.021734822541475296, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.020614754408597946, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02034074440598488, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.012496559880673885, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.010649725794792175, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.010536598041653633, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.009737628512084484, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.009567519649863243, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006553054787218571, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006600316148251295, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006191269028931856, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00439761346206069, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.21063759922981262, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.19689738750457764, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.19212473928928375, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.17366695404052734, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09836001694202423, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09330569207668304, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11069075018167496, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10207117348909378, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09984997659921646, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08813297003507614, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0833754688501358, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.056222859770059586, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04873821884393692, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04700726270675659, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04659349471330643, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02804216742515564, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.023969724774360657, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.023820294067263603, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.021862199530005455, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.021601175889372826, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.014490959234535694, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01423101220279932, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.013867243193089962, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.008948265574872494, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.22577711939811707, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.19736821949481964, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.18657027184963226, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.15906372666358948, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10486491024494171, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09354206174612045, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12428146600723267, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11405403912067413, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10846173763275146, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0862666666507721, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07877428084611893, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06425856798887253, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0558152012526989, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05162128061056137, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0506170280277729, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.032595887780189514, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02868611365556717, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0283499825745821, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02532280795276165, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024706587195396423, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.018546557053923607, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.020508117973804474, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01732206530869007, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0163846667855978, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17005547881126404, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.15994061529636383, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.15670548379421234, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14255884289741516, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.07999042421579361, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07637713849544525, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.08895041048526764, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08223653584718704, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08107319474220276, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07223987579345703, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.0687134638428688, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04545702785253525, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.03951617702841759, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.038504742085933685, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.038274649530649185, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.022763483226299286, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.020185548812150955, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.020116472616791725, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.018654601648449898, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.018513524904847145, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012235281057655811, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.012743192724883556, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.011906793341040611, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009136775508522987, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23185306787490845, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.218149334192276, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21390455961227417, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19454166293144226, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10873876512050629, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10389161109924316, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1207636222243309, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11159981042146683, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11021097749471664, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0981898158788681, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09334345906972885, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06127224117517471, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05330795422196388, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05200937017798424, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05169999599456787, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03065001219511032, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02653917670249939, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.026449142023921013, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024368805810809135, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024175245314836502, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015986274927854538, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015682656317949295, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015548678115010262, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009959825314581394, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.23902159929275513, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.21426552534103394, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.2054419070482254, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.18212001025676727, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10966143757104874, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10041137784719467, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12697969377040863, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11704769730567932, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11297277361154556, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09452987462282181, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08859897404909134, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06492356210947037, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05630127713084221, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.0529605932533741, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.052129678428173065, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03269204497337341, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02808079868555069, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.027923567220568657, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.025176910683512688, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.024671847000718117, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.017959758639335632, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018457990139722824, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.016940593719482422, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.013471146114170551, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11233796179294586, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10472209751605988, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10178028047084808, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09173806011676788, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05221016705036163, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.049236737191677094, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.059362031519412994, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05485345795750618, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.053008850663900375, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.046664051711559296, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0442051887512207, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.030145207419991493, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.026222938671708107, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.025026289746165276, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02473747916519642, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015078282915055752, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012928793206810951, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012815828435122967, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011813649907708168, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011635086499154568, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00790544506162405, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00797988660633564, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007498986087739468, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005340341478586197, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09369461983442307, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.08745335042476654, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08473332226276398, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07645516097545624, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04356008395552635, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.041000425815582275, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.049878302961587906, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.046125903725624084, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.044238027185201645, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.03896459937095642, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03694096952676773, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.025297854095697403, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.022023694589734077, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0208736639469862, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.020591212436556816, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.012648256495594978, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.010756184346973896, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.010635832324624062, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.009827920235693455, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.009649108164012432, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006614495534449816, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006623264402151108, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006239847280085087, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004357462748885155, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2120826095342636, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1981792002916336, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.19332073628902435, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.17458981275558472, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09895864129066467, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09380915015935898, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11166366934776306, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10285089910030365, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10048580914735794, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08860050141811371, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08386059105396271, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05668770149350166, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0490821935236454, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.047345224767923355, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0469246432185173, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.028283964842557907, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02414175495505333, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02398434840142727, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.022000109776854515, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.021732449531555176, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01460796594619751, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.014352520927786827, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.013955951668322086, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009018228389322758, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2204628586769104, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.19250933825969696, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.18016982078552246, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.15495699644088745, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10141861438751221, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08997943997383118, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12434054911136627, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11383209377527237, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10598688572645187, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08447165042161942, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0779038518667221, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06398678570985794, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05523980036377907, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0496334582567215, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04824750870466232, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03223583474755287, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02706127241253853, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026566198095679283, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.023915421217679977, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.023046180605888367, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017761871218681335, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01906491443514824, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01599865034222603, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014536438509821892, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17068643867969513, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16038921475410461, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1570555865764618, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14278104901313782, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08027590066194534, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07658898830413818, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.0893469899892807, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.0826801210641861, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08138709515333176, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0724378153681755, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06886854767799377, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04567386209964752, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0397554449737072, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.038687270134687424, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03843405470252037, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02285856194794178, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.020320286974310875, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02025168389081955, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.018782852217555046, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.018631188198924065, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012274660170078278, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.012899947352707386, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01192393247038126, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00931626372039318, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2297973930835724, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21590666472911835, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21161818504333496, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19223907589912415, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.107735775411129, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10284551978111267, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11974232643842697, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11068245023488998, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10921035706996918, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.097150519490242, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.0922110378742218, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06079870089888573, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05286064371466637, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.051525067538022995, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05121055990457535, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.030343465507030487, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.026264848187565804, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02617451176047325, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02409077249467373, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02388838119804859, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015718752518296242, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015496477484703064, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015254741534590721, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009801779873669147, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.24252448976039886, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.21701443195343018, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.20767667889595032, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.18335530161857605, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11145111918449402, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10164966434240341, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1297195553779602, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11935920268297195, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11493834108114243, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09560666978359222, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08951211720705032, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06651110202074051, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05746690183877945, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.053803808987140656, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05292364954948425, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03354435786604881, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.028482358902692795, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.028310159221291542, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.025433054193854332, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.024886323139071465, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01857701689004898, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018707571551203728, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.017469223588705063, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.013563957996666431, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11893948167562485, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11087044328451157, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10772785544395447, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09715956449508667, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05530301854014397, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05219649523496628, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06276644021272659, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05799548327922821, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05617089569568634, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04941880702972412, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0467536561191082, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.031860437244176865, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.027701446786522865, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.026477456092834473, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02619151957333088, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01592446118593216, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013625772669911385, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013507898896932602, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012435855343937874, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012249909341335297, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008310193195939064, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008327065035700798, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007899666205048561, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005462227389216423, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09868128597736359, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09206409752368927, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08925578743219376, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08054256439208984, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04589693620800972, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04320690780878067, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05249340087175369, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04856615513563156, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.046601079404354095, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04108298197388649, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.038939908146858215, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.026626644656062126, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.023193951696157455, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02198243886232376, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.021696852520108223, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013307273387908936, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011318319477140903, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011188960634171963, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010342588648200035, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01015702448785305, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0069497572258114815, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006954589858651161, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006564341951161623, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0045511918142437935, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.22055867314338684, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2060488909482956, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.20084483921527863, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.18135450780391693, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10305130481719971, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09761031717061996, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1163678765296936, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10720765590667725, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10461953282356262, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09222934395074844, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08726539462804794, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05916209891438484, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.051254499703645706, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04931928589940071, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.048854272812604904, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02955061011016369, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.025212477892637253, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02504080906510353, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.022993188351392746, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.022697562351822853, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015360115095973015, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015105519443750381, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014670025557279587, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009621178731322289, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.23015530407428741, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2043253481388092, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.19444221258163452, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.16863657534122467, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10756178945302963, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09715279191732407, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12655405700206757, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11569216102361679, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11043849587440491, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08943890035152435, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08336138725280762, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06486789882183075, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05634414404630661, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05261426791548729, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05172937735915184, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03275495767593384, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02878570184111595, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.028468383476138115, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025440357625484467, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024883437901735306, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.018254049122333527, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.02003096602857113, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017137039452791214, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01565784215927124, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17437337338924408, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16386832296848297, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16044139862060547, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14570221304893494, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08211649954319, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07831460982561111, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09144723415374756, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.0845496729016304, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08323340862989426, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07402822375297546, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.0703480988740921, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.046728964895009995, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.040684375911951065, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.039595380425453186, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03933704271912575, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.023402800783514977, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02085375227034092, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.020777210593223572, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019272994250059128, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019115369766950607, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012593844905495644, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01330682635307312, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012240050360560417, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009675314649939537, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23342309892177582, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21937529742717743, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21491701900959015, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19519145786762238, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10958966612815857, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10457058250904083, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12182324379682541, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11260172724723816, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11106616258621216, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09880529344081879, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09372616559267044, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.061927326023578644, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05377072095870972, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05240113288164139, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05208344757556915, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03092728927731514, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.026699988171458244, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02660192735493183, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024471325799822807, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024259096011519432, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01603858172893524, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015726011246442795, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015563413500785828, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009892106056213379, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.24836629629135132, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2221032679080963, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.21290922164916992, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.18796049058437347, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11409901082515717, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10429465770721436, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13212648034095764, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12150739133358002, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11762326955795288, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09783880412578583, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09146977961063385, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06773137301206589, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05849422886967659, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05508190020918846, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05427347496151924, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.034202177077531815, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.029226141050457954, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02910219319164753, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02610805816948414, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.025601571425795555, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018978435546159744, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.019207248464226723, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.017942678183317184, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014043290168046951, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12694218754768372, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11826034635305405, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11500641703605652, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10369467735290527, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05907763913273811, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.055766813457012177, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06701992452144623, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.061930350959300995, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05999940633773804, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05281348153948784, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05000854656100273, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.034083932638168335, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.029647739604115486, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.028357360512018204, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.028055498376488686, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017048750072717667, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014717236161231995, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014598725363612175, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013467016629874706, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013273931108415127, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008974509313702583, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009169443510472775, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008542574942111969, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00626021483913064, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1030387207865715, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09612258523702621, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.0929538831114769, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08383214473724365, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0478656180202961, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04497215524315834, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.055069588124752045, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.050970401614904404, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04861599579453468, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04283132404088974, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04063766822218895, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.027928415685892105, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02432345785200596, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.022949479520320892, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.022615907713770866, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013964158482849598, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011829652823507786, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011685997247695923, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010819967836141586, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010607993230223656, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00730167469009757, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007322506979107857, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006871248595416546, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004820448812097311, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2295631766319275, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.21458464860916138, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.20933236181735992, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1890331208705902, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10734359920024872, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.1017041727900505, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1210523471236229, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11147234588861465, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10894441604614258, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09610055387020111, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09098377078771591, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.061538733541965485, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0532892607152462, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05136137083172798, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05089356377720833, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.030737323686480522, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02626791223883629, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026096081361174583, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02396245300769806, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02366574853658676, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01595238223671913, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015726810321211815, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01523607037961483, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0100399199873209, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.23574598133563995, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2105741947889328, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.20253436267375946, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.17342594265937805, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11048290878534317, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10128310322761536, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12772592902183533, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11625923961400986, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11336857825517654, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09263015538454056, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08453124761581421, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06616592407226562, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05670996382832527, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05412088334560394, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05349867045879364, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.033447518944740295, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.029466912150382996, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.029303591698408127, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.026133371517062187, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02575855329632759, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.018865592777729034, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.020171543583273888, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.018109295517206192, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.015787553042173386, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17138053476810455, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16101956367492676, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.15764720737934113, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1431800127029419, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08077141642570496, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07699650526046753, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.08987132459878922, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08316139131784439, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08188097178936005, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07279668748378754, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06915169209241867, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04596174508333206, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04002884775400162, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03895074501633644, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03869348764419556, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02301950752735138, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02048167586326599, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02040349505841732, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.0189166571944952, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.018761826679110527, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012376529164612293, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.0130274947732687, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012029794044792652, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009433122351765633, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2360982894897461, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22181569039821625, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21740366518497467, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19748011231422424, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.1108650416135788, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10579407960176468, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1232355386018753, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1138727143406868, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.1123737320303917, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09993065148591995, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09486296027898788, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06264198571443558, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05439320579171181, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.053026966750621796, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05270428955554962, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03130130097270012, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.027018122375011444, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02692447043955326, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024756327271461487, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024549465626478195, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016270656138658524, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015912167727947235, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01581287942826748, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010015662759542465, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.24384668469429016, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.21840453147888184, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.20911575853824615, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.18517059087753296, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11192815005779266, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10223421454429626, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13051149249076843, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11968307942152023, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11542916297912598, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.0962613970041275, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.0901360809803009, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06645866483449936, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05745922029018402, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.053955212235450745, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05310574918985367, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.0333712212741375, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02846892923116684, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02830621413886547, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.0254339799284935, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.024887636303901672, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018081653863191605, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018534353002905846, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.016948986798524857, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01327818725258112, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12882764637470245, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12030428647994995, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11701446026563644, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10565502196550369, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05999501049518585, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.056708838790655136, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06798966974020004, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06281948834657669, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06089932098984718, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05371873080730438, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05089261755347252, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03451742231845856, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03001810610294342, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02873564325273037, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.028429633006453514, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017244376242160797, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014796523377299309, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014672279357910156, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013532955199480057, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013336852192878723, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008995123207569122, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00905139371752739, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008558924309909344, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005973950494080782, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10378113389015198, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09693614393472672, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09388644248247147, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08481809496879578, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04826454445719719, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.045442208647727966, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05534132570028305, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.051265813410282135, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0490046963095665, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04327546805143356, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04104696586728096, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0280221626162529, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.024460168555378914, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.023119637742638588, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02279437892138958, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014010411687195301, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011881565675139427, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011738527566194534, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010870756581425667, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010665240697562695, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007301875855773687, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007291717454791069, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0068789394572377205, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004736767150461674, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2353474348783493, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22039929032325745, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.21488848328590393, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.19432833790779114, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11032573878765106, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10451306402683258, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12472415715456009, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11486389487981796, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11189401149749756, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09897744655609131, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09384088218212128, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06339402496814728, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05497520789504051, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05278610438108444, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05225980281829834, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03168272599577904, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02693232148885727, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026724325492978096, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.024602407589554787, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024269171059131622, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016425471752882004, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0160699263215065, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01565140299499035, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010121719911694527, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.23667162656784058, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2149287313222885, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2080690711736679, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.18367603421211243, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.1106172725558281, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10239045321941376, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12604810297489166, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11574675887823105, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11309444904327393, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09493426978588104, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08866249769926071, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06515268236398697, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05648826062679291, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.054164640605449677, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0536157488822937, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03311687335371971, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.029636230319738388, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.029484273865818977, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02672283723950386, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.026379287242889404, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01902032643556595, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.02043050155043602, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01833733730018139, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01615738868713379, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17592953145503998, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16526323556900024, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16185595095157623, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1470188945531845, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08286195993423462, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07902557402849197, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09220308810472488, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08531589061021805, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08400658518075943, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07469707727432251, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07093057036399841, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04708646610379219, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04100504517555237, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03989093750715256, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03963547945022583, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02355090342462063, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.020875921472907066, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.020798886194825172, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019258974120020866, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01909828931093216, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012558341026306152, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01314388494938612, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012195506133139133, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00935816578567028, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.237359419465065, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2230842560529709, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21863119304180145, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19865776598453522, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11153300106525421, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10643290728330612, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12381970137357712, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11458636820316315, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11306825280189514, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10053375363349915, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09532768279314041, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06293852627277374, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05472865328192711, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05334518104791641, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05301019549369812, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.031405258923769, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02717399224638939, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02707543410360813, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024904005229473114, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024694450199604034, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016243021935224533, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015997812151908875, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015769297257065773, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010058834217488766, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.24678218364715576, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.22033779323101044, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.210883229970932, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.18648946285247803, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11293692141771317, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10305861383676529, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13135147094726562, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12093429267406464, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11667957156896591, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09696688503026962, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09080467373132706, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06702364981174469, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05798757076263428, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05433790758252144, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05345340073108673, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03372318670153618, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02845880575478077, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.0282973051071167, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.025326166301965714, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02476777695119381, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01827179081737995, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018283702433109283, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.017125161364674568, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012791137211024761, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12367968261241913, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11561950296163559, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11250139027833939, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10164081305265427, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05767614394426346, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05452002212405205, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06542186439037323, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06044342368841171, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05854935199022293, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05172054097056389, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04905034974217415, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03323514759540558, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02889235131442547, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.027626125141978264, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02732660248875618, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01662193238735199, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014212623238563538, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014087664894759655, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013000400736927986, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012813051231205463, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00867582205682993, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008671266958117485, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008256183937191963, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00568206375464797, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10272746533155441, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09600922465324402, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09302322566509247, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08401532471179962, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04783421382308006, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04503919929265976, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05480092018842697, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.050780054181814194, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04855183884501457, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0428987555205822, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04069714620709419, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02775520831346512, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02422904223203659, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.022919584065675735, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.022597163915634155, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01387757807970047, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011796482838690281, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011658851057291031, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010799331590533257, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010596951469779015, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007240708917379379, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007256541401147842, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006831125356256962, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004736901726573706, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.24035382270812988, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22516608238220215, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.219781756401062, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.19887062907218933, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11272566020488739, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10700168460607529, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1272021234035492, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11706069111824036, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11436972767114639, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10122199356555939, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09592928737401962, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06462240219116211, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05599445104598999, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05394667387008667, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0534786581993103, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03230518102645874, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027549926191568375, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02736097201704979, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02517537772655487, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02486155554652214, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0167547520250082, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016418974846601486, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01600482314825058, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0103790033608675, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.22566263377666473, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.20372411608695984, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.19381551444530487, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1699749380350113, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10484682768583298, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09525932371616364, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1261904239654541, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11548806726932526, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10844525694847107, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09097899496555328, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08417519181966782, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06502228230237961, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.055908165872097015, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.050964921712875366, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04975423961877823, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03272228315472603, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027225198224186897, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026762306690216064, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.024540765210986137, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.023790128529071808, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017808230593800545, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0183936208486557, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016222326084971428, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.013456687331199646, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1781776249408722, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16739198565483093, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16396428644657135, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1488863080739975, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08398863673210144, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08008795976638794, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.0934397429227829, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08644317090511322, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08514285832643509, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07570067048072815, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07191019505262375, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04771977290511131, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04158995673060417, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04048372060060501, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.040219470858573914, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.023906802758574486, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02125340700149536, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.021172162145376205, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.01962241530418396, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019462034106254578, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012826254591345787, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013465043157339096, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012463905848562717, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00969112478196621, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24158242344856262, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22694995999336243, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22241227328777313, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.2020636796951294, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11349815130233765, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10833191126585007, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1260833442211151, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1165834441781044, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11504604667425156, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10234761238098145, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09706931561231613, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06408405303955078, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05569043010473251, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05429050698876381, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05395736172795296, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03200872242450714, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.027683869004249573, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.027585504576563835, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.025381438434123993, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.025166332721710205, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016644559800624847, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01633390411734581, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016162188723683357, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010328277945518494, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.24664406478405, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.22006049752235413, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.21013100445270538, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.18584772944450378, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11304240673780441, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10280463844537735, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1321180760860443, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12156174331903458, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11680879443883896, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09687043726444244, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09081908315420151, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06749604642391205, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.0583183690905571, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.054449357092380524, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.0535096675157547, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03400641307234764, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.028603367507457733, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.028415897861123085, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02545160986483097, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.024844050407409668, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.0185139998793602, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018528256565332413, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.017285902053117752, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.013064718805253506, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13494393229484558, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12561343610286713, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.121717169880867, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10967881977558136, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06307418644428253, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05928593873977661, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07232434302568436, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06670123338699341, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06409725546836853, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05618983134627342, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05327073112130165, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03675185889005661, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03191995993256569, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03028135560452938, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.029888249933719635, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.018388161435723305, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015690313652157784, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.015529864467680454, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.0143140759319067, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.014070602133870125, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009670089930295944, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009781683795154095, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009127160534262657, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006608018651604652, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11580751836299896, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10788027197122574, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10417652130126953, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09392394125461578, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05413424223661423, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.050735387951135635, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06263556331396103, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.057784806936979294, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05503159016370773, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.048300206661224365, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04586398974061012, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03181133046746254, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.027629297226667404, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.025999674573540688, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.025605184957385063, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015905944630503654, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013454539701342583, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013279109261929989, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012277083471417427, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012025847099721432, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008363769389688969, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008404092863202095, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007843571715056896, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005613179411739111, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2512502372264862, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.23462235927581787, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2291160374879837, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20688137412071228, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11789773404598236, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11180230230093002, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13339824974536896, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.1220589429140091, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11970622092485428, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10536305606365204, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09998953342437744, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06787507236003876, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.058269232511520386, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05639730766415596, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05595064163208008, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03386716544628143, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028690338134765625, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.028546899557113647, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.026091933250427246, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.025805579498410225, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01747673563659191, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01690000481903553, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01666417345404625, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010485895909368992, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11251281201839447, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10600962489843369, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10397784411907196, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09519519656896591, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.052865367382764816, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05050921440124512, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05952939763665199, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.054352983832359314, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05354364588856697, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.048028577119112015, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0464843325316906, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.030792659148573875, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.026645582169294357, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.025988155975937843, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.025830509141087532, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01580372080206871, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014387136325240135, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014339250512421131, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013495937921106815, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01340157724916935, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009277132339775562, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.010000289417803288, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009096927009522915, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.008101812563836575, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1764184534549713, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16565033793449402, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1621832698583603, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14725933969020844, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08330360054969788, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07940603792667389, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09290723502635956, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08572766184806824, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.0844673365354538, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07498510926961899, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07130881398916245, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04756654053926468, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04120004549622536, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04010184109210968, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03984174504876137, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02377009578049183, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.021002037450671196, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.020926130935549736, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019360389560461044, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019201576709747314, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012801342643797398, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013251065276563168, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012448197230696678, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009459299966692924, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2480890154838562, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23292241990566254, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22818370163440704, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20722366869449615, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11680679768323898, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11140147596597672, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.13026002049446106, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1200290322303772, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11840751022100449, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10514071583747864, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09991002827882767, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06631802767515182, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.057361945509910583, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.055900122970342636, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.055548928678035736, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03310551866889, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02859729342162609, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.028496485203504562, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.026211833581328392, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02598954737186432, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017399096861481667, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01702277921140194, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016904566437005997, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010948830284178257, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.23712261021137238, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.21686773002147675, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.20969989895820618, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.18749868869781494, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10983815044164658, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10240878909826279, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1263113021850586, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11579964309930801, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11243341863155365, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09666825830936432, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09093453735113144, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06437498331069946, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05563897639513016, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05294077470898628, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.052288707345724106, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.032328151166439056, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.027905812487006187, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.027775991708040237, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.025353431701660156, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.0249442420899868, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.017635131254792213, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018023928627371788, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01677887700498104, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012970996089279652, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11887984722852707, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11085717380046844, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1073436439037323, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09686200320720673, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05564499273896217, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05235820636153221, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06375453621149063, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05886191502213478, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05653605982661247, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0497540682554245, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.047175824642181396, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.032552558928728104, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.028304990381002426, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.026856258511543274, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.026503032073378563, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016316847875714302, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014154157601296902, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014007830992341042, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012995608150959015, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0127840805798769, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008736779913306236, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009143838658928871, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008261446841061115, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006570710800588131, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1061476543545723, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09903404861688614, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09577640146017075, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08646462857723236, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0496378093957901, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.046609748154878616, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05728473141789436, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05285896360874176, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.050417594611644745, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0444057397544384, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04220806807279587, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.029142551124095917, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.025283275172114372, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02384471707046032, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.023499390110373497, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014569548889994621, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01234347466379404, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01219151820987463, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011293010786175728, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011072433553636074, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007664886768907309, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0077132415026426315, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007200338412076235, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0051754675805568695, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2386952042579651, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22340188920497894, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.217942014336586, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1970822662115097, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11211465299129486, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10627958923578262, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1266358494758606, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11652754992246628, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11376626044511795, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.1004483550786972, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09523303806781769, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06444012373685837, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05568411201238632, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.053656235337257385, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0531604140996933, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03215567022562027, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02736072987318039, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027178920805454254, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02495153434574604, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02464188076555729, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016625763848423958, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01627087965607643, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015861695632338524, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010230105370283127, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.18344399333000183, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1641402393579483, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.15757222473621368, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1376633644104004, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.08570845425128937, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07850358635187149, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.09943978488445282, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09093590080738068, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.08789017051458359, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07282865792512894, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06741967052221298, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05129988119006157, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04448016360402107, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04213257506489754, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04157231003046036, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02597067318856716, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.023260600864887238, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.023093758150935173, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.020893439650535583, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02055126056075096, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.014742381870746613, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01634194515645504, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01404445432126522, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.013049905188381672, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17734535038471222, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16675734519958496, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16333729028701782, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14848537743091583, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08378075808286667, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07993092387914658, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09356316924095154, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08624710142612457, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08493378758430481, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07558324187994003, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07192286103963852, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04790268838405609, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.041431326419115067, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04032040014863014, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04005837067961693, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.023972637951374054, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.021106474101543427, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.021027447655797005, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019482659175992012, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01932402141392231, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012969563715159893, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013288281857967377, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01261205319315195, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009459906257689, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24646621942520142, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2318347841501236, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22720515727996826, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20658843219280243, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11610598117113113, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11083804816007614, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12944084405899048, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11928331106901169, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11764807254076004, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10473798215389252, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09963369369506836, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06590709090232849, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05700290948152542, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.055544137954711914, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05519619211554527, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03287023678421974, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.028385411947965622, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.028282111510634422, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.026050180196762085, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.0258282832801342, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01724531129002571, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.0168510340154171, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016748417168855667, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010767018422484398, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2436324805021286, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.22171591222286224, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.21367394924163818, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.19070905447006226, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11269339919090271, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10443463176488876, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13039235770702362, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11955349892377853, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11565152555704117, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09876357018947601, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09303842484951019, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06670676916837692, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.057561244815588, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05444364622235298, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05370480567216873, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03356939181685448, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.028963541612029076, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.028815170750021935, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.026294803246855736, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02583887428045273, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018557729199528694, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.019090428948402405, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01756412535905838, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014058196917176247, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11307623982429504, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10553912818431854, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10231834650039673, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.0922958180308342, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05281701311469078, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04971585422754288, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0604977160692215, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0558415949344635, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05363825336098671, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04727116972208023, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.044813621789216995, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03078818880021572, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02675306610763073, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02538791298866272, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02505054511129856, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015399700030684471, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013201557099819183, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013064530678093433, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01209793146699667, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011890705674886703, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008129481226205826, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008308013901114464, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0076749445870518684, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005702613387256861, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.0975584015250206, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.0910494476556778, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08789614588022232, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07934365421533585, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04547274485230446, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.042659275233745575, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.052695609629154205, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04868683964014053, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04617398977279663, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04071889817714691, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0386601984500885, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0266939178109169, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.023249153047800064, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.021814748644828796, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.021467342972755432, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013358763419091702, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011289095506072044, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011132472194731236, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010336308740079403, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0101172411814332, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0070310416631400585, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007065524347126484, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006579138338565826, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004712834022939205, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.23261532187461853, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2177923619747162, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.21230334043502808, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.19193123281002045, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10912907868623734, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10340201109647751, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12361974269151688, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11373543739318848, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11072289943695068, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09787549823522568, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09269538521766663, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06285975873470306, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.054348576813936234, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.052206069231033325, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.051716506481170654, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03135765716433525, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02667808160185814, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026487715542316437, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.024358727037906647, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024034343659877777, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016257330775260925, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01595710963010788, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015473141334950924, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010098577477037907, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.21528492867946625, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.18751059472560883, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1781870722770691, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1497209519147873, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0996013656258583, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08930391073226929, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11611373722553253, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10693151503801346, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10319025069475174, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08132123947143555, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07332445681095123, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06014502793550491, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.052444618195295334, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.049136869609355927, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04833734780550003, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.030524015426635742, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027324924245476723, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027098219841718674, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.024022594094276428, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02353178709745407, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017417525872588158, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.019481606781482697, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016431232914328575, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.015689659863710403, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17517141997814178, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16479338705539703, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16137316823005676, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1467491090297699, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08268783241510391, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07891186326742172, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09228379279375076, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08515090495347977, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08380235731601715, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07464839518070221, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07106242328882217, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.047209732234478, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.040938850492239, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03983034938573837, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03957142308354378, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.023600183427333832, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.020921599119901657, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02084003947675228, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.01934586651623249, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.0191885344684124, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012747159227728844, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013283166103065014, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01238771341741085, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009574837982654572, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24141424894332886, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22713708877563477, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2226785570383072, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20264382660388947, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11356645077466965, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10849995911121368, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1264982372522354, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1166989728808403, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.1150890439748764, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10255696624517441, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09752605855464935, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06431545317173004, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.055720292031764984, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05430743098258972, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05397273227572441, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03208562359213829, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02772608958184719, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.027625225484371185, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02546730451285839, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.025246625766158104, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016799069941043854, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01641072705388069, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016316169872879982, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010431873612105846, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.24621394276618958, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.22260738909244537, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.2140376716852188, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.19035355746746063, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11354759335517883, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10465739667415619, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1314077228307724, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12092256546020508, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11677192151546478, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09876707196235657, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09292955696582794, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.0672997236251831, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05818777531385422, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05487120896577835, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05407159402966499, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.033885397017002106, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02918970212340355, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.029028892517089844, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.026362720876932144, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02585923857986927, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018787115812301636, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.019274434074759483, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01777263730764389, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014174499548971653, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.108097605407238, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10096198320388794, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09776834398508072, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.0882815271615982, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05043385550379753, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.047438934445381165, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05794161558151245, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.053494248539209366, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.051178477704524994, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04515358433127403, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04288904741406441, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.029445059597492218, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02557864971458912, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02419090084731579, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02386200800538063, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014725197106599808, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012530568987131119, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012387682683765888, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011476147919893265, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011267968453466892, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007740437053143978, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007820709608495235, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007274885196238756, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005266651976853609, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.0937582403421402, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.08762522041797638, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08454855531454086, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07637882232666016, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04371063783764839, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04097453132271767, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.050695184618234634, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.046886835247278214, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.044369712471961975, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.03920020908117294, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.037265993654727936, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.025717098265886307, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.022397371008992195, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.020962407812476158, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.020619511604309082, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.012857540510594845, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.010837668552994728, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.010678666643798351, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.009927130304276943, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0097079798579216, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0067540304735302925, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006774423643946648, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006306107621639967, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004484249744564295, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2266296148300171, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.212211474776268, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.20685553550720215, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.18712732195854187, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.1062578558921814, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10064247995615005, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12069407850503922, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11084204912185669, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10779647529125214, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09532982856035233, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09037256240844727, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06129804998636246, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.052948180586099625, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.050844356417655945, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05033733695745468, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.030592184513807297, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.025993183255195618, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.025792650878429413, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.023748978972434998, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.023430321365594864, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015849411487579346, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0155788017436862, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015052041038870811, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00988897867500782, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.20579877495765686, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.17953996360301971, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.16612029075622559, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.14375604689121246, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09476300328969955, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08257828652858734, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11987603455781937, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10920639336109161, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09871350973844528, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0790378525853157, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07436096668243408, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.061693206429481506, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05323704704642296, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04662400111556053, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.044950179755687714, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03104192577302456, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02583140693604946, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02516384981572628, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.022972246631979942, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02195034921169281, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017143260687589645, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018813829869031906, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015060904435813427, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014613698236644268, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1730910688638687, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16286532580852509, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.15942400693893433, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14511699974536896, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08162327110767365, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07788807153701782, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09111788123846054, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08419081568717957, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08271283656358719, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07375045120716095, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07018841803073883, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04659629985690117, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04047556594014168, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.039336323738098145, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03906473144888878, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.023302622139453888, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.020708350464701653, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.020625196397304535, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.01916585862636566, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019003601744771004, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012579896487295628, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013219119980931282, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01221158355474472, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009592375718057156, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23497343063354492, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22122067213058472, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21676892042160034, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19731535017490387, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11046842485666275, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10552847385406494, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12311552464962006, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11357354372739792, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11191263794898987, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09978275001049042, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09491068124771118, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06255049258470535, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05422273650765419, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05282556638121605, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05249291658401489, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.031164124608039856, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.0269774179905653, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.026875576004385948, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02478872984647751, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024579914286732674, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01620353013277054, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015987513586878777, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01570725254714489, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010184518061578274, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2476763129234314, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.22316882014274597, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.21396256983280182, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.18985560536384583, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11433200538158417, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10487289726734161, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13325825333595276, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12236548215150833, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11769295483827591, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09897401928901672, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09291309118270874, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06837926059961319, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05892271548509598, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.055261097848415375, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05437086150050163, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.034462619572877884, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.029372865334153175, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02918284945189953, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02642984502017498, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.025877617299556732, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.019195687025785446, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.019423039630055428, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.018073376268148422, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014226979576051235, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11131291091442108, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10401616990566254, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10088696330785751, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09117146581411362, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.051858168095350266, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.048890240490436554, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.059365831315517426, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05481509864330292, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05262792110443115, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.046488698571920395, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04412159323692322, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.030147027224302292, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.026179973036050797, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02485477551817894, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.024534905329346657, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015064259991049767, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012805589474737644, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012668943963944912, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011725908145308495, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011520369909703732, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007880008779466152, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007886131294071674, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007431879639625549, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005192067939788103, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.0948401466012001, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.08868584036827087, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08570301532745361, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07750418037176132, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04418197646737099, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.041478902101516724, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05103874206542969, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04724782332777977, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04483013227581978, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.039644189178943634, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03769831731915474, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.025874897837638855, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.022561704739928246, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02116468735039234, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.020827127620577812, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01292113121598959, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.010913156904280186, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.010760040022432804, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01000240258872509, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.00978832133114338, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006769032217562199, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006773501168936491, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006336230784654617, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004442264791578054, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.22690439224243164, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2124684453010559, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.20700858533382416, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.18744966387748718, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10632725059986115, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10065139085054398, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.120710089802742, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11094208061695099, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.107860267162323, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0954483300447464, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09051232784986496, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06140149012207985, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.053100619465112686, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05090804398059845, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.050360407680273056, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.030689295381307602, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02609146572649479, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.025875529274344444, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.023874949663877487, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.023533090949058533, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01599428988993168, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01575891114771366, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015186889097094536, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010137718170881271, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2153710126876831, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.19120129942893982, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1822902113199234, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.15702515840530396, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09990110248327255, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09072493761777878, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11778190732002258, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10753320157527924, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10327455401420593, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08376588672399521, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07759447395801544, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06063159182667732, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05232490226626396, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.048862703144550323, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.048014890402555466, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.030585531145334244, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02668244205415249, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026427801698446274, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02370450831949711, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02319107949733734, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01718965359032154, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01853095553815365, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016134856268763542, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01444222405552864, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17375899851322174, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16358715295791626, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.160145103931427, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14580510556697845, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08188991248607635, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07815597951412201, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.0914163812994957, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08439388126134872, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08299540728330612, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0740288496017456, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07053879648447037, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.046713199466466904, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04059456288814545, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03946791961789131, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03919795900583267, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02336009219288826, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.020793169736862183, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02070501819252968, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.01925892010331154, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019099902361631393, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012616093270480633, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01328993309289217, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012251188978552818, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009667095728218555, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23328982293605804, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21964077651500702, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21533966064453125, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19599997997283936, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10968572646379471, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10478957742452621, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12222914397716522, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11270147562026978, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11110716313123703, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09913870692253113, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09431435912847519, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.062133073806762695, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05380077287554741, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05241925269365311, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05209019035100937, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.0309729166328907, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02673342078924179, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.026627494022250175, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024563631042838097, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024353845044970512, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016124069690704346, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015788676217198372, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01563436910510063, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009970546700060368, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.25205278396606445, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2262209802865982, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.21665528416633606, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.19180703163146973, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.1162082701921463, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10630885511636734, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13547679781913757, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12437786161899567, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11980685591697693, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.10015440732240677, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.0939709022641182, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06959354132413864, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.0599038265645504, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05618520826101303, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.055292222648859024, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03515465185046196, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.029919439926743507, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02974482625722885, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.026851793751120567, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.026293447241187096, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.019684923812747, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01984231546521187, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01855015940964222, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014609118923544884, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11847374588251114, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1107049435377121, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10749677568674088, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.0971858948469162, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.055234216153621674, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05210814252495766, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0630321279168129, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05821036919951439, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.056072842329740524, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04953290522098541, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04705753177404404, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0320562869310379, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.027861572802066803, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.026515237987041473, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02618473395705223, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016025463119149208, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013773888349533081, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013641382567584515, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012644928880035877, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01244132500141859, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00845341570675373, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008630567230284214, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008006317541003227, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005907756742089987, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.0973697304725647, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09103168547153473, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08778879791498184, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07938369363546371, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04526563733816147, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04243285953998566, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0525190569460392, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.048658888787031174, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.045941803604364395, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04064028337597847, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03869329392910004, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02662729285657406, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02321568690240383, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02169749327003956, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02133079618215561, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01331260148435831, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011206884868443012, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011037169024348259, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010278661735355854, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010045893490314484, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006988735403865576, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0070047322660684586, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006525666918605566, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004627489484846592, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2295355647802353, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.21497786045074463, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.20965538918972015, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1899777203798294, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10744857043027878, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.101863332092762, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12171898037195206, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.1118091493844986, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10900998115539551, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09653788059949875, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09160120040178299, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.061924975365400314, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05348999798297882, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05142112076282501, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.050916288048028946, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.030936067923903465, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02633621357381344, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026137247681617737, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02411273680627346, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.023794136941432953, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01609588786959648, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015869133174419403, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015312397852540016, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010194548405706882, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.22012116014957428, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.19528745114803314, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.18700529634952545, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.15677177906036377, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10252612829208374, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09339126199483871, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11945314705371857, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10913334041833878, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10578455030918121, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08483254164457321, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07614242285490036, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06202157959342003, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05326017737388611, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05023517087101936, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04951029643416405, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.031404100358486176, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027413710951805115, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027215851470828056, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02412225678563118, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.023671796545386314, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017819011583924294, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018911102786660194, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0169500894844532, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014752708375453949, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1680179387331009, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1581486463546753, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.15487492084503174, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1409958004951477, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.0791829377412796, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.0755862221121788, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.08826421946287155, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08156535774469376, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08024847507476807, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07159564644098282, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06816107034683228, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04511350765824318, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.03921511396765709, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03814154863357544, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0378962866961956, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.022558273747563362, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02005586214363575, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.019975798204541206, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.018567854538559914, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01841391809284687, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012166409753262997, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.012758697383105755, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.011816511861979961, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009223400615155697, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2311224341392517, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21770356595516205, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21346281468868256, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1943650245666504, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10858752578496933, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10379575937986374, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12087341398000717, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11153693497180939, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11001137644052505, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09820235520601273, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09343588352203369, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06146685779094696, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.053238172084093094, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05190013721585274, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05158710852265358, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03062683343887329, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02644958533346653, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.026356443762779236, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02431025356054306, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024110661819577217, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01595931127667427, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015594509430229664, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015499242581427097, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009827039204537868, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2441592812538147, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.21888205409049988, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.2089901864528656, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.184992253780365, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11229288578033447, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10239047557115555, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1326615810394287, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12105803936719894, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11578796803951263, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09660737216472626, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09080781787633896, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06755838543176651, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05813948065042496, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05421040952205658, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05325922742486, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.033944688737392426, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02876407653093338, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.0285342987626791, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.025751110166311264, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.025143209844827652, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018651196733117104, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01899012178182602, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.017387166619300842, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.013778331689536572, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1210164874792099, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11325252801179886, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11010074615478516, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09973704814910889, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05635427311062813, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05330153554677963, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0641762837767601, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05923357233405113, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05719035863876343, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.050674114376306534, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0481412410736084, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03261126950383186, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.028293391689658165, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.026986554265022278, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02667880803346634, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016278628259897232, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013917946256697178, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013787037692964077, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012772073969244957, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012575558386743069, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008513870649039745, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008553632535040379, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008063746616244316, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005661310162395239, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09834979474544525, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09205776453018188, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08899127691984177, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08060035854578018, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04569529742002487, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04298663139343262, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05279793590307236, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04885926842689514, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04636678844690323, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04112783819437027, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03914535790681839, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.026701003313064575, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.023292070254683495, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02188512310385704, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02153751626610756, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01333874836564064, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011261573992669582, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01110476441681385, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010337119922041893, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010122692212462425, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006972647737711668, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006957802455872297, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0065337251871824265, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004529339261353016, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.22845637798309326, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.21433071792125702, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2089395821094513, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1896352618932724, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10704819858074188, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10148531198501587, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12149839103221893, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11173039674758911, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10854750126600266, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09641004353761673, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09163706749677658, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06176965683698654, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05349826067686081, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.051225077360868454, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.050676196813583374, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.030870230868458748, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02619134448468685, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.025963682681322098, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.024009570479393005, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0236589964479208, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01606513373553753, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015741486102342606, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015252158977091312, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00998948235064745, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2208547741174698, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2007996290922165, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1941658854484558, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.17201419174671173, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10301484167575836, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09561865776777267, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11771337687969208, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10765086859464645, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10522635281085968, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0884244441986084, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08317992836236954, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06087490916252136, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05245308578014374, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05036386102437973, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04986472427845001, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.030988072976469994, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027461271733045578, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027323545888066292, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02471957355737686, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024410953745245934, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017838044092059135, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018799223005771637, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017222773283720016, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014758657664060593, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.16767939925193787, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.15782761573791504, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.15458524227142334, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14079268276691437, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.07891339063644409, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07536784559488297, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.08787352591753006, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08127976953983307, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.07997526973485947, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0713653415441513, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06799528747797012, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04489685967564583, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.03901296481490135, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.0379498228430748, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03769797831773758, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02241402305662632, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.019852519035339355, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.019772648811340332, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.018360331654548645, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.018209362402558327, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.011990430764853954, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.012490217573940754, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.011645854450762272, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008874735794961452, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.22864551842212677, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21542927622795105, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2112855762243271, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1924927532672882, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10745188593864441, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10269296169281006, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11946411430835724, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11032350361347198, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10883338749408722, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0971972718834877, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09254424273967743, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.060684140771627426, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05266214534640312, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.051344119012355804, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.051040858030319214, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.030240152031183243, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02616328001022339, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02607060596346855, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024060005322098732, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02386019378900528, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015694882720708847, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01542375236749649, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01523515209555626, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009709784761071205, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2440515160560608, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.21782022714614868, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.20766735076904297, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.18400326371192932, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11191689968109131, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10170495510101318, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13177071511745453, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.120842844247818, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11567860096693039, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09604306519031525, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09023670107126236, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06731238216161728, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05795510485768318, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05391867086291313, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05294375866651535, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.033943288028240204, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.028392991051077843, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.028175583109259605, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.025303345173597336, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.024674199521541595, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018727902323007584, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018496563658118248, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.017479047179222107, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01310445461422205, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11792857944965363, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11050175130367279, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10744169354438782, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09738931804895401, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05500582233071327, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05204081907868385, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06259096413850784, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.057790521532297134, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05578362196683884, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04951338842511177, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04714386165142059, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03181586414575577, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.027612589299678802, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02634858898818493, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.026042766869068146, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015893306583166122, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013558311387896538, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013433147221803665, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012453893199563026, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012261329218745232, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008318795822560787, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008308157324790955, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0078963004052639, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005459399428218603, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09847376495599747, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09221632033586502, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08918672055006027, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08074108511209488, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0457807332277298, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04308415204286575, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05268697068095207, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04882577434182167, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.046439070254564285, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.041191667318344116, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.039198242127895355, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.026686886325478554, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02329033985733986, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.021921774372458458, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.021590515971183777, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013332000933587551, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011297602206468582, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011145680211484432, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010373525321483612, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010166564956307411, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006980542093515396, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006984962150454521, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006556967739015818, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004579669330269098, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.23380844295024872, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.21960054337978363, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.21438781917095184, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.19452707469463348, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10959993302822113, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10411403328180313, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12397705018520355, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11393828690052032, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1111125722527504, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09876707941293716, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09394435584545135, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06303659081459045, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.054491911083459854, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.052433840930461884, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.051937755197286606, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03149709850549698, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026798272505402565, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0265976469963789, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02457645907998085, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024255773052573204, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01638261415064335, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0160455834120512, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015615708194673061, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010193036869168282, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2158726304769516, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1957302689552307, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.18766677379608154, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.16939875483512878, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10050228238105774, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09228065609931946, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11825783550739288, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10849922150373459, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10350970923900604, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08824407309293747, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08367286622524261, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06095253303647041, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05235947296023369, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04870843142271042, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04782579094171524, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.030584285035729408, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.025809934362769127, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.025492297485470772, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02340788021683693, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02285740338265896, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01651659794151783, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017032410949468613, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015343756414949894, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.012315475381910801, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.16746972501277924, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.15770959854125977, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.15452717244625092, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14075687527656555, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.0788964182138443, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07536010444164276, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.08785682171583176, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08117583394050598, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.07995434105396271, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07138103246688843, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06800131499767303, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.044849246740341187, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.03901662304997444, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03797969967126846, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.037738896906375885, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.022428687661886215, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.01995409093797207, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.01988346129655838, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.018481532111763954, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.018331186845898628, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012075082398951054, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.0126698212698102, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.011739233508706093, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00913912057876587, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23173965513706207, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21816442906856537, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2139713168144226, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19498306512832642, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10882404446601868, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10406006872653961, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1209927573800087, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11170604079961777, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11022159457206726, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09847496449947357, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09375665336847305, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.061470095068216324, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0533067025244236, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.052008919417858124, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05170233175158501, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03065640851855278, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02652561292052269, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.026433393359184265, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024403361603617668, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02420380711555481, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015984047204256058, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01565452665090561, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015542454086244106, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009907382540404797, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.23840928077697754, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2120136022567749, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.20125186443328857, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.17819473147392273, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.1092751994729042, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09870985150337219, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1300533264875412, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11926747858524323, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11323505640029907, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09356621652841568, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08813155442476273, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.0665702372789383, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.057348474860191345, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05283350870013237, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05173046141862869, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03365714102983475, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02807786501944065, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.027805954217910767, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.025066515430808067, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.024353012442588806, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018687574192881584, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.0187120009213686, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.017290715128183365, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01355503499507904, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12518353760242462, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11733844131231308, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11430677026510239, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10360641777515411, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.058417193591594696, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.055368900299072266, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06627939641475677, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0611361488699913, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05923917144536972, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.052595414221286774, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05011649429798126, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.033719781786203384, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02925216592848301, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02801535278558731, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02771790139377117, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01686142571270466, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014500880613923073, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014381321147084236, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01334006804972887, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013157661072909832, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008873283863067627, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008976848796010017, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008464319631457329, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006063221953809261, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10333450883626938, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09681699424982071, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.0938330814242363, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08495572954416275, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04803856089711189, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04529530927538872, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.055053748190402985, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05101598799228668, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04871748015284538, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04324593394994736, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.041137825697660446, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02787886932492256, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.024330588057637215, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02299817092716694, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02267460525035858, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013935310766100883, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011842809617519379, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011696189641952515, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010878151282668114, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01067402120679617, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0072888475842773914, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007297637406736612, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006876643281430006, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004774791654199362, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.23847201466560364, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22399918735027313, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2190088927745819, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.19890707731246948, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11183564364910126, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10640323907136917, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12596873939037323, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11593865603208542, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11332344263792038, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10087201744318008, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09590868651866913, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06404665112495422, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05541060492396355, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05347311869263649, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.053003884851932526, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03194180503487587, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02726791612803936, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02709929272532463, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02502245083451271, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02473076991736889, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01652875356376171, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016217844560742378, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015810316428542137, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010210534557700157, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.22039948403835297, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.20177491009235382, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.19568924605846405, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1725226789712906, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10211195796728134, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09523998200893402, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11729172617197037, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10600437968969345, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10408060252666473, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0883978009223938, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08174800127744675, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.060168009251356125, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05191648006439209, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05017798766493797, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04975089058279991, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.030908532440662384, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027774866670370102, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027661608532071114, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025264915078878403, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.025024347007274628, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.018110821023583412, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.019405893981456757, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017612114548683167, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.015663720667362213, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1631382256746292, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1535298377275467, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.15040625631809235, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.13698425889015198, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.07680626213550568, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.0733516663312912, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.08551280945539474, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.07901666313409805, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.0778060182929039, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.06944287568330765, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06619340181350708, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0436372235417366, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.03793516755104065, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03693216294050217, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.036702416837215424, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.021813973784446716, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.01933898590505123, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.019266655668616295, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.017890796065330505, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01774757169187069, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.011683373712003231, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.012187685817480087, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.011361884884536266, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008691107854247093, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.22739028930664062, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21422167122364044, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21005426347255707, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19146932661533356, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10679610073566437, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10210195928812027, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11879104375839233, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10961532592773438, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10819795727729797, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09663897007703781, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09207922965288162, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06036193668842316, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05230521038174629, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05103031545877457, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.050725698471069336, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.030131462961435318, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.026030106469988823, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02594091184437275, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02393839694559574, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.023745162412524223, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015756985172629356, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015366591513156891, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015318630263209343, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009723612107336521, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.23277589678764343, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.20887981355190277, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.1999061405658722, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.17757648229599, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10673938691616058, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09758854657411575, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12457219511270523, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11450163275003433, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11008290946483612, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09218733012676239, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08672746270895004, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06365074217319489, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.054893746972084045, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05134088173508644, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.050495948642492294, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03196822106838226, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02695685252547264, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02677105739712715, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.024118751287460327, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.023562893271446228, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.017421012744307518, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017410853877663612, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.0162874236702919, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012232190929353237, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12024899572134018, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11278392374515533, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10974487662315369, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09961795061826706, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05606308951973915, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05311359092593193, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06384972482919693, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.058850232511758804, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05684443190693855, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.050565484911203384, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04820822551846504, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.032444022595882416, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.028127066791057587, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02686251699924469, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02655612863600254, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016209837049245834, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013829128816723824, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013705650344491005, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012714117765426636, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012523681856691837, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008473855443298817, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008466255851089954, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008058225736021996, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00556949432939291, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09947418421506882, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09323865175247192, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09022196382284164, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08175671845674515, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0462566576898098, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.043568387627601624, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05324174091219902, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04929476976394653, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.046921394765377045, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.041664969176054, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.039707083255052567, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.026983898133039474, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02352299913764, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.022154608741402626, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02182372286915779, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013479447923600674, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011423449963331223, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011275475844740868, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010500527918338776, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010293280705809593, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007067885249853134, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0070833927020430565, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00664871372282505, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004669340327382088, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.23310190439224243, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.21907876431941986, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2142418473958969, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1947200745344162, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10919927060604095, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10393298417329788, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12301907688379288, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11310794949531555, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11070498079061508, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09855634719133377, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09387795627117157, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0624907985329628, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05401451885700226, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.052207667380571365, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.051788266748189926, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.031178804114460945, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.0266164168715477, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02646460570394993, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.024427102878689766, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02415185049176216, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016127927228808403, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01578664593398571, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015432234853506088, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00992120336741209, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.21696364879608154, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.19610202312469482, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.18940815329551697, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1580447256565094, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.1018034890294075, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09408213198184967, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11602181196212769, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10627876222133636, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10400614887475967, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08489640057086945, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07547838240861893, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05973142758011818, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05152474716305733, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04949282854795456, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04901450499892235, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.030027929693460464, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026568375527858734, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026423806324601173, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.023372165858745575, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.023062165826559067, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016648169606924057, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017695775255560875, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016029968857765198, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.013460488058626652, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1674976944923401, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.15774033963680267, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1545817106962204, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1409432291984558, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.07883884757757187, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07535010576248169, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.08770305663347244, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08105918765068054, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.07987913489341736, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07132227718830109, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06797847151756287, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04471423104405403, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.03887363523244858, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.037871528416872025, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03763763979077339, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.022383319213986397, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.019741829484701157, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.019674669951200485, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.018252480775117874, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01810522750020027, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.011999533511698246, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.012311916798353195, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.011679809540510178, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008637704886496067, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23019418120384216, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21704953908920288, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21285614371299744, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19413059949874878, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10814723372459412, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10342501103878021, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12031234800815582, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11095979064702988, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10953610390424728, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09790541231632233, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.0932871550321579, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06107347831130028, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05295421928167343, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05168944597244263, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.051392458379268646, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03054654970765114, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02640489861369133, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.026314089074730873, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024305937811732292, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02411619946360588, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016097888350486755, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01563328504562378, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01567188650369644, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009980035945773125, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.23543807864189148, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2109294980764389, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.2016696035861969, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.17915654182434082, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10782413929700851, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09843743592500687, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12592966854572296, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11596750468015671, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11132553219795227, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09315717220306396, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08758873492479324, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06421945989131927, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05570022016763687, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05196913331747055, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.051040828227996826, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.032222867012023926, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.027392884716391563, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.027197618037462234, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.024554317817091942, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.023971639573574066, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01726182922720909, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01787484809756279, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01601434499025345, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012730193324387074, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1194184273481369, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11213962733745575, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10917196422815323, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09919576346874237, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.055732611566782, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05281741917133331, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06337469816207886, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.058402448892593384, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.056472111493349075, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05031633749604225, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04805704951286316, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.032196249812841415, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.027881046757102013, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.026664281263947487, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02637580782175064, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016100579872727394, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013719265349209309, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01359914243221283, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012628836557269096, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012444346211850643, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008457696065306664, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008377344347536564, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008055402897298336, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00550507940351963, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10120700299739838, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09498440474271774, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09210056811571121, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08362912386655807, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.047144416719675064, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04451356455683708, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05403190478682518, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04998217523097992, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.047797318547964096, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04254264011979103, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0405578538775444, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.027384597808122635, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.023847365751862526, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0225678738206625, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.022259199991822243, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013680710457265377, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01160514447838068, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011471611447632313, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01068077702075243, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010487313382327557, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007153802085667849, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007129752542823553, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006758494768291712, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004647326190024614, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.23494720458984375, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22105208039283752, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.216325581073761, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.19686079025268555, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11018167436122894, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10500980168581009, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12361904233694077, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11378571391105652, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11163531988859177, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09956355392932892, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09482693672180176, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06284376233816147, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05431865528225899, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.052645243704319, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05223456397652626, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03132099285721779, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026743846014142036, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02659410983324051, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.024547122418880463, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02428002841770649, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0161135196685791, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015695681795477867, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015459271147847176, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009667690843343735, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2291116565465927, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.20697984099388123, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.20004583895206451, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.16991356015205383, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10752545297145844, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09932251274585724, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12184597551822662, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11186665296554565, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10982739925384521, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09020697325468063, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08115512132644653, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06292912364006042, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05434221029281616, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05241663381457329, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.051954686641693115, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0318005234003067, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028314338997006416, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02819335274398327, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025047626346349716, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02475884184241295, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01796233467757702, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01903526484966278, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01739409752190113, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014706622809171677, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17187204957008362, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16184493899345398, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.15865197777748108, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14468953013420105, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08085355162620544, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07729010283946991, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.08973289281129837, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08305461704730988, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08192746341228485, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07318241894245148, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06971792131662369, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0457397922873497, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.03980005532503128, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03880476951599121, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03857443854212761, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02282661199569702, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.020160917192697525, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.020093411207199097, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.01862991414964199, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01848907582461834, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012089253403246403, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01247607171535492, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.011764682829380035, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008651862852275372, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23375527560710907, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2202976793050766, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21613216400146484, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19717098772525787, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10978084057569504, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10505885630846024, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12175674736499786, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11262204498052597, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11120009422302246, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09943884611129761, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09471424669027328, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06185157224535942, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05372455716133118, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.052446167916059494, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.052147455513477325, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.030807318165898323, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02671200968325138, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.026626409962773323, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024585282430052757, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024389466270804405, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015968339517712593, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015716498717665672, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015531974844634533, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009881443344056606, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.23496170341968536, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.21035882830619812, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.2007584124803543, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.1785215437412262, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10753735154867172, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09797605127096176, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12625019252300262, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11619949340820312, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11109865456819534, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09292250871658325, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08746795356273651, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06422150135040283, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.0556357204914093, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05172836780548096, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.050765540450811386, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03213712200522423, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02709956280887127, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02687542885541916, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.0242425799369812, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.023627694696187973, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01716792769730091, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017497099936008453, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01590084657073021, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01218381430953741, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11699409037828445, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10978520661592484, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10675888508558273, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09702211618423462, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.054539937525987625, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.051672521978616714, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.062305137515068054, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.057396069169044495, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05532028153538704, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04926850274205208, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04709300771355629, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03169667720794678, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.027427440509200096, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.026152804493904114, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.025849763303995132, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015858812257647514, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013527300208806992, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013397962786257267, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012469020672142506, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012278511188924313, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008358476683497429, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008383487351238728, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007942743599414825, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005632652435451746, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.0999678298830986, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.0937090739607811, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09069274365901947, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08237828314304352, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.046506669372320175, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04381320998072624, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05363087356090546, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.049612682312726974, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.047156210988759995, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04195711016654968, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.040015362203121185, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.027152229100465775, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.023667272180318832, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.022275960072875023, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.021941039711236954, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013573823496699333, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01148133259266615, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011328691616654396, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010562006384134293, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010352345183491707, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007112668361514807, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007106497418135405, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0066916802898049355, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004662726074457169, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2330365777015686, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.21908262372016907, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.21432366967201233, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1950913965702057, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10927926003932953, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10405788570642471, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1229044646024704, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11308272927999496, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11072036623954773, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09865272790193558, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09398069977760315, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06243756785988808, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05395834520459175, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05222068727016449, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05180201306939125, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03112521767616272, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026567215099930763, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02642170898616314, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02439858205616474, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024132253602147102, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016049254685640335, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015692507848143578, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015377246774733067, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009780306369066238, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.21600662171840668, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.19092535972595215, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1826533079147339, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.15646561980247498, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10053852945566177, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09105318784713745, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11672380566596985, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10640738904476166, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10345588624477386, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08339406549930573, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07619407773017883, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0602746456861496, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05182743817567825, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04917126148939133, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.048530563712120056, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03060023859143257, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026873445138335228, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026689542457461357, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02372453361749649, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.023333366960287094, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017496399581432343, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018570860847830772, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016715949401259422, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01458328403532505, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17681309580802917, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16657328605651855, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16336765885353088, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1490425169467926, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08314795792102814, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.0795198604464531, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.0924675464630127, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08539925515651703, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08422492444515228, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.075307697057724, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07186219096183777, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04713824763894081, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.040938202291727066, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03993040695786476, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03969747945666313, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.023574301972985268, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.020805809646844864, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02074025198817253, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019254613667726517, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01911151222884655, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012643732130527496, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.012961560860276222, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012319302186369896, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00909431092441082, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23381437361240387, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22042298316955566, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21626153588294983, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19737844169139862, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10981936007738113, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10510677844285965, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12188581377267838, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11262282729148865, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11124005913734436, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09954137355089188, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09486310929059982, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.061940956860780716, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.053764890879392624, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.052507150918245316, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05220368504524231, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.030933260917663574, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.026806924492120743, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.026716699823737144, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02469661459326744, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024499114602804184, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01623290590941906, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.0158486757427454, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01580766960978508, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010094073601067066, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.23335802555084229, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.20807048678398132, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.1979185938835144, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.17600640654563904, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10666104406118393, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09670503437519073, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.125784769654274, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11591368913650513, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11032037436962128, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09182636439800262, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08647706359624863, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.064040407538414, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.0554254949092865, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05124855041503906, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.0502321794629097, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.031993672251701355, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02673671394586563, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.026492547243833542, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.023818397894501686, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02315615676343441, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.017038721591234207, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017159920185804367, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.015697110444307327, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01172550581395626, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12242569774389267, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11504513025283813, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11193624138832092, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1018812283873558, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05712512508034706, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05419936403632164, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06497909128665924, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05992273613810539, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05790418013930321, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05163133516907692, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.049317244440317154, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03302369639277458, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.028626782819628716, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.027366694062948227, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.027072744444012642, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01651974394917488, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014096387661993504, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013969882391393185, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012984502129256725, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012800075113773346, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008627998642623425, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008628569543361664, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008205953054130077, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0057020001113414764, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10171285271644592, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09546056389808655, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09242086112499237, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08399450778961182, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04735254868865013, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04466605931520462, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05446216091513634, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.050447214394807816, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04801523685455322, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04276765510439873, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.040796659886837006, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02758835069835186, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.024087509140372276, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.022695215418934822, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02235802635550499, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013795563019812107, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011705254204571247, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01155287865549326, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01078255195170641, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010572542436420918, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007232506759464741, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007256846874952316, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006808277685195208, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00478738360106945, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.22994855046272278, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.21637935936450958, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2117043286561966, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.19280217587947845, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.1077343225479126, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10270088911056519, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12110675126314163, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11152861267328262, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.109153613448143, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09742272645235062, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09282632917165756, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06156453490257263, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.053256310522556305, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05150516703724861, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05107271671295166, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03073291666805744, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02625429257750511, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026099976152181625, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02413511835038662, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.023865513503551483, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015874193981289864, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015552871860563755, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015197820961475372, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009765297174453735, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.20950450003147125, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1855728179216385, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1778157651424408, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.14744648337364197, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0974656492471695, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08863824605941772, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11256708949804306, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10296899825334549, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1003904640674591, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07938776910305023, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0715998187661171, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05795770138502121, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0499630942940712, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04747113585472107, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04686663672327995, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02921343222260475, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02553493343293667, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.025382990017533302, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.022106628865003586, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02172006107866764, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016274122521281242, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017156371846795082, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015517409890890121, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.013035330921411514, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17280113697052002, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16285210847854614, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.15971136093139648, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14569425582885742, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08118831366300583, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07764626294374466, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09019424766302109, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08341974020004272, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08225975185632706, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07357249408960342, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07009803503751755, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04596114903688431, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04003599286079407, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.039025843143463135, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.038789719343185425, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.023040233179926872, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.020338911563158035, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02026261016726494, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.018821800127625465, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.018678003922104836, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01233735866844654, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.012667395174503326, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012008598074316978, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008887151256203651, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.22877943515777588, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21582333743572235, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21177154779434204, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19338877499103546, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10730135440826416, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10271529108285904, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11921517550945282, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1100735142827034, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10869686305522919, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09732040762901306, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09279434382915497, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.060519371181726456, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05256379768252373, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05132220312952995, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.051026273518800735, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.030309204012155533, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02619551494717598, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.026103463023900986, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024135107174515724, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02395094931125641, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01592901721596718, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015473002567887306, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015518075786530972, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009847333654761314, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2219153493642807, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.19472521543502808, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.18245942890644073, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.1611652672290802, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10125470161437988, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.0896155834197998, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12277107685804367, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11321544647216797, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.10538963228464127, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.08598918467760086, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08095581829547882, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06264699995517731, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.0548323430120945, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.04933812841773033, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.047914981842041016, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03167030215263367, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.026732726022601128, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02634011209011078, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02388257347047329, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02303086780011654, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01716923527419567, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018625469878315926, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.015424646437168121, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014026996679604053, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1270226389169693, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11949443817138672, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11651039123535156, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10612060874700546, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.059335857629776, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05639345571398735, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06721094250679016, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06201944127678871, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06012094393372536, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05370759963989258, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.051306918263435364, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03416862338781357, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.029622787609696388, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.028406308963894844, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.028117690235376358, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017088402062654495, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014606863260269165, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014492985792458057, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013471914455294609, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013291406445205212, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008963186293840408, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00889748428016901, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008566932752728462, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005838025361299515, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10847456008195877, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10195101052522659, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09906335175037384, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.0901368111371994, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05056128650903702, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.047861289232969284, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05769723281264305, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0533599779009819, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05124750733375549, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04575042799115181, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0436527319252491, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02923732064664364, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02548210509121418, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02421228401362896, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.023907268419861794, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014620142057538033, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012448703870177269, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012315469793975353, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011476296000182629, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01129002682864666, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007634105626493692, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007622984237968922, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007238972932100296, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004979568999260664, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.23278914391994476, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.21939624845981598, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.21480736136436462, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.19580262899398804, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.1091027557849884, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10413499176502228, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12209023535251617, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11251545697450638, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1104792058467865, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09880828857421875, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09417878836393356, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0620056614279747, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05370761826634407, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05211983993649483, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0517338402569294, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.030941400676965714, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026476673781871796, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026342574506998062, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.024351827800273895, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024110540747642517, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015920277684926987, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015522869303822517, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015306837856769562, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009571176022291183, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.22377754747867584, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2040320336818695, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.19794417917728424, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.17312616109848022, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10478874295949936, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09749399870634079, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11887065321207047, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10906587541103363, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10713385790586472, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0903962105512619, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0830351710319519, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06137050315737724, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.053083259612321854, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05113465338945389, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.050691477954387665, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.031070111319422722, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027708716690540314, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02759396657347679, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.024989433586597443, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024714238941669464, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017650006338953972, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01874524913728237, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017067765817046165, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014573300257325172, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17345431447029114, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16346463561058044, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16033996641635895, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14626723527908325, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08145814388990402, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07793981581926346, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09029879420995712, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08360417932271957, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08253166824579239, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07384184747934341, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07028911262750626, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04601474106311798, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0400887131690979, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.039111386984586716, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03888114169239998, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.022997088730335236, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.020319117233157158, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.020251644775271416, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.018795736134052277, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01865546777844429, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012162330560386181, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01256478764116764, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.011844024993479252, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008725227788090706, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23165319859981537, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21853359043598175, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21455320715904236, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1957414150238037, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10870035737752914, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10408391058444977, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12031810730695724, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11142381280660629, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.1100969910621643, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09857383370399475, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09382874518632889, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06110628321766853, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0531887486577034, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.051953595131635666, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05166155472397804, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.030476806685328484, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02644977532327175, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.026365092024207115, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02436099760234356, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024176999926567078, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01575532928109169, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015527362935245037, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015342236496508121, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009750387631356716, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.228806272149086, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.20174242556095123, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.189795583486557, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.168114572763443, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10422322154045105, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09298530220985413, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12588343024253845, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11583695560693741, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.10836014896631241, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.08891858160495758, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08391575515270233, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06409173458814621, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05553993955254555, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.0503118596971035, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.049011461436748505, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.032251447439193726, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.026565320789813995, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02620816044509411, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.023595932871103287, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.022762266919016838, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.017506977543234825, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017641736194491386, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01586710661649704, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012434594333171844, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1272582858800888, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11958207190036774, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11655014753341675, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10603222250938416, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05940078943967819, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05640529468655586, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06744907796382904, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06219707429409027, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.060204360634088516, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05373278260231018, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05134816840291023, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.034282930195331573, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02972346544265747, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.028481826186180115, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.028169093653559685, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01717018149793148, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014696192927658558, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014575078152120113, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013555814512073994, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013371611014008522, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009022689424455166, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009038579650223255, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008613910526037216, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0060341814532876015, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10984597355127335, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10319250077009201, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10011598467826843, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09106945246458054, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05116088315844536, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04838072508573532, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0585755817592144, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05418579280376434, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05187298730015755, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04624241590499878, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04412063583731651, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.029669813811779022, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02585386112332344, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.024512657895684242, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.024181850254535675, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01483668852597475, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012609454803168774, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012461341917514801, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011613109149038792, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011407957412302494, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007760100066661835, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007738762069493532, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00734059838578105, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005055512301623821, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.23491041362285614, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22125498950481415, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2165672779083252, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.19736860692501068, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.1100846529006958, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10505630075931549, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12329631298780441, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.1136627122759819, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11153898388147354, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09964586794376373, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09488887339830399, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06262291967868805, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05424392968416214, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05261148884892464, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05222437530755997, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.031233251094818115, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026766760274767876, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026631727814674377, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.024603573605418205, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024362191557884216, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01608697883784771, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015746204182505608, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015468030236661434, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009808260947465897, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.20914743840694427, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.18553785979747772, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.17766991257667542, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.15250158309936523, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09727879613637924, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0886102244257927, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11263466626405716, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10294197499752045, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1001918688416481, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08058023452758789, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07498346269130707, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0581890270113945, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05016358569264412, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.047627221792936325, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04701666906476021, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.029523737728595734, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026041241362690926, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.025872012600302696, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.022935181856155396, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.022561658173799515, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016836244612932205, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017973577603697777, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01608065515756607, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014116354286670685, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1786816567182541, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16845133900642395, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16527794301509857, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15087080001831055, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08396344631910324, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08033718913793564, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09322640299797058, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08614552766084671, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.0850447416305542, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07611940801143646, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07256323099136353, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.047501806169748306, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.041337866336107254, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04034699127078056, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04011816903948784, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.023824317380785942, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.021032514050602913, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.020966216921806335, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.01946675404906273, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01932866871356964, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012775914743542671, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013097092509269714, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012460343539714813, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00919925607740879, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23430348932743073, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2210647165775299, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21695035696029663, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19802874326705933, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11001906543970108, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10533489286899567, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12187686562538147, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11274603009223938, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11141471564769745, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09975635260343552, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09498749673366547, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.061948761343955994, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05386811122298241, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05262048915028572, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05232326313853264, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03099542111158371, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.026861805468797684, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.026779064908623695, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024756720289587975, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.0245722196996212, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016259625554084778, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01588275283575058, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015849029645323753, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010131516493856907, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.22512350976467133, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.1983119398355484, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.18622499704360962, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.16494496166706085, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10262522846460342, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09133938699960709, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12427624315023422, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11446820199489594, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.10664582252502441, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.08741574734449387, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.0825282484292984, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06341022253036499, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.054856572300195694, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.049529578536748886, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.04820692166686058, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.031780242919921875, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.026121074333786964, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02573384903371334, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.023172453045845032, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02231447771191597, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01711067371070385, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017346546053886414, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.015417457558214664, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012159918434917927, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12385564297437668, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11638516187667847, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11334183067083359, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10312704741954803, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.057734981179237366, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.054779425263404846, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0654902309179306, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06048782914876938, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.058523669838905334, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.052178073674440384, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04983223229646683, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.033217668533325195, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02886894717812538, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.027635741978883743, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.027335725724697113, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016616983339190483, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01418590173125267, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014061717316508293, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013055465184152126, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01286761462688446, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00869421660900116, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008614961057901382, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008288886398077011, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005601485725492239, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10740815103054047, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1008618101477623, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09787174314260483, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.0889272540807724, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04998604208230972, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04724910855293274, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.057212550193071365, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.052919428795576096, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05067773908376694, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04515121132135391, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04308171570301056, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.028994537889957428, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.025269515812397003, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02393755316734314, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02361869066953659, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01449830923229456, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012308049947023392, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012167521752417088, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011332127265632153, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011131701059639454, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00757563067600131, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007547932211309671, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007162528112530708, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004924069158732891, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.23276683688163757, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.21904593706130981, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.21430647373199463, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1951199173927307, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10894649475812912, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10382890701293945, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1221010610461235, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11265792697668076, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11036176234483719, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09852725267410278, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09383096545934677, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06199916452169418, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05374804884195328, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05202794447541237, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05161352455615997, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.030935972929000854, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026471978053450584, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026326008141040802, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.024328483268618584, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024068426340818405, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01595829427242279, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01560982409864664, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015328757464885712, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00972238089889288, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.21293774247169495, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.18576693534851074, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.17640413343906403, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.15027138590812683, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0988582894206047, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08832885324954987, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11540209501981735, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10576595366001129, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10237818956375122, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08075733482837677, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07338578999042511, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05960938706994057, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0515228696167469, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04830709844827652, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.047538354992866516, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.030221950262784958, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026348324492573738, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02613607980310917, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02299691177904606, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.022517891600728035, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017135947942733765, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018221989274024963, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016156336292624474, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014187567867338657, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17948509752750397, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1691713035106659, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16596052050590515, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1514097899198532, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08432380110025406, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08066065609455109, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09358380734920502, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08649701625108719, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08539865165948868, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07639633119106293, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07283112406730652, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04767932370305061, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.041463885456323624, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04048130288720131, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.040246378630399704, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.023894688114523888, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.021041011437773705, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02096792683005333, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.01945618726313114, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019310390576720238, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012765505351126194, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013002986088395119, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012444174848496914, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00903025921434164, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2326468676328659, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21952055394649506, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21541261672973633, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19649475812911987, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10910701006650925, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10447724163532257, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12104646116495132, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11185537278652191, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.1105046197772026, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09892389923334122, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.0943070501089096, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06139859929680824, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05343282222747803, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.052191589027643204, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05190368369221687, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.030686788260936737, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02666671946644783, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.026585619896650314, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024573717266321182, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02438722364604473, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.0159814041107893, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015789397060871124, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01556676160544157, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01011638343334198, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.23831583559513092, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.21264642477035522, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.20279870927333832, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.18047089874744415, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.1090136244893074, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09905921667814255, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12839779257774353, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11772982031106949, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11271610110998154, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.0939626544713974, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08852703869342804, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06533705443143845, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05646746978163719, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.052529796957969666, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05156901478767395, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.0328080840408802, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02762649953365326, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.027416501194238663, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02469828724861145, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.024084361270070076, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01772911287844181, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017965374514460564, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01645086705684662, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012688132002949715, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.32.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12232643365859985, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11484307795763016, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1118815466761589, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1017363891005516, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05706067755818367, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05412442982196808, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06480995565652847, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.059751007705926895, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.057847265154123306, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0515662282705307, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04923971742391586, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03292813152074814, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.028552919626235962, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02733219973742962, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.027034061029553413, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016483861953020096, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014066816307604313, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013942398130893707, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012954632751643658, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012769432738423347, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008646935224533081, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008594287559390068, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008251415565609932, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005652256775647402, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.32.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10444711148738861, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09799719601869583, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09491920471191406, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08623825013637543, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.048572979867458344, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04582284763455391, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05590631067752838, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.051720719784498215, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.049249183386564255, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04386172443628311, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04185470938682556, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.028330344706773758, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.024674322456121445, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.023279905319213867, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02294037491083145, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014184340834617615, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012008949182927608, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011860034428536892, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011064489372074604, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010851152241230011, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007457513362169266, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0074442001059651375, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00702373031526804, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00491703487932682, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.32.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.23915518820285797, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22500395774841309, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.22030532360076904, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20061567425727844, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11200244724750519, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10682941228151321, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12539364397525787, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11554151028394699, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1135149747133255, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10125882923603058, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09641985595226288, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06367514282464981, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05512187257409096, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.053492337465286255, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.053097061812877655, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.031747881323099136, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02717496082186699, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027046849951148033, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.024958297610282898, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024710891768336296, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0163474939763546, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01593979261815548, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015729175880551338, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009846456348896027, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.32.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.20113201439380646, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.17626020312309265, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.16859129071235657, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.14399345219135284, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09315238147974014, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.083829864859581, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10661252588033676, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09809736162424088, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0960095077753067, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07618370652198792, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06979601830244064, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.054990146309137344, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04793332517147064, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04572341591119766, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.045206617563962936, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.027903785929083824, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.025247182697057724, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02511586807668209, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02216910943388939, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.021829616278409958, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015923546627163887, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01767800934612751, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015267495065927505, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014170180074870586, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.32.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17422929406166077, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16415680944919586, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16105537116527557, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14695265889167786, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08184738457202911, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07831035554409027, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09069609642028809, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08394388109445572, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08288814127445221, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07412958145141602, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.0706247091293335, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04617467150092125, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04020235687494278, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03924567624926567, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03902478888630867, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.023080384358763695, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02031668648123741, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.020255349576473236, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.0187781173735857, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.018633965402841568, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012186006642878056, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.012452390044927597, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.011875882744789124, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00851453933864832, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.32.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.22725069522857666, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2143232822418213, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2104310244321823, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1919541209936142, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10661692172288895, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10209092497825623, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11798095703125, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10927321761846542, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10796665400266647, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09667391330003738, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09207046031951904, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.059954121708869934, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05214739963412285, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05094144493341446, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05066411942243576, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.029896538704633713, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02591823972761631, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02584371715784073, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02386503852903843, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02368241921067238, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015479676425457, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01519324816763401, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015075592324137688, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009517021477222443, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.32.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2296944111585617, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2046842873096466, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.19489166140556335, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.17360414564609528, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10479045659303665, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09508949518203735, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12384021282196045, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11375827342271805, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.10843932628631592, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09039336442947388, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08534087985754013, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06309962272644043, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05451232194900513, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05047180503606796, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.04948318377137184, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.031704049557447433, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02651822194457054, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.026291070505976677, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.023722907528281212, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02308986522257328, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01719404011964798, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017265940085053444, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.0158986859023571, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012132346630096436, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.33.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1251818686723709, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11773516237735748, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11485162377357483, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10459479689598083, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.058462340384721756, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.055585071444511414, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.065888911485672, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.060968007892370224, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0592326745390892, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05290932208299637, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05042105168104172, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03342125192284584, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02909465879201889, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02795729599893093, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.027686232700943947, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01670554094016552, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014295009896159172, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014184127561748028, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013161752372980118, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012988626025617123, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008702724240720272, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008576074615120888, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008331788703799248, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005471435841172934, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.33.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11079109460115433, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10423634201288223, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1014925092458725, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09240168333053589, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05172121524810791, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.049081407487392426, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05872504040598869, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05425538122653961, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05242742598056793, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04678912088274956, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04468053951859474, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02978215180337429, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0258743055164814, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.024736206978559494, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.024459099397063255, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014876464381814003, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012657202780246735, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012539714574813843, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011652827262878418, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011478687636554241, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007726605515927076, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007622542325407267, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007355001289397478, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004850409924983978, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.33.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2371089905500412, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22339753806591034, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.21891559660434723, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.19964122772216797, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11109514534473419, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10621820390224457, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12391684949398041, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11433378607034683, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11256402730941772, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10063032805919647, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09592076390981674, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06292801350355148, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05454834923148155, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.053065065294504166, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.052714888006448746, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03136712685227394, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026946820318698883, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026829518377780914, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02477237954735756, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02455371804535389, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01614510267972946, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015753578394651413, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015592715702950954, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009704926051199436, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.33.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.21626637876033783, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.18200942873954773, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1697780191898346, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.13748009502887726, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09940716624259949, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08636990934610367, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1178351491689682, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10841590911149979, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10376471281051636, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07508276402950287, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06960698217153549, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06072167307138443, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.052467431873083115, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.048354387283325195, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04735241085290909, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03053000196814537, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.025987541303038597, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02568836882710457, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.021430058404803276, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.020741648972034454, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016853228211402893, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017664294689893723, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01558398175984621, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.013199098408222198, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.33.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.18273958563804626, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1722506582736969, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16903042793273926, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15425150096416473, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08581279218196869, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08214008063077927, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09508230537176132, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08802974224090576, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.0869530737400055, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0778111070394516, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07414813339710236, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.048447299748659134, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04214534908533096, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.0411544032394886, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04092641919851303, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02420843578875065, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.0212794691324234, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02121352031826973, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019670430570840836, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01952468603849411, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.0127701535820961, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013006621040403843, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012451301328837872, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008849980309605598, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.33.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.232953742146492, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2197711020708084, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21577772498130798, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19697672128677368, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.1092735007405281, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10463705658912659, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12120921164751053, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1119476854801178, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11066953092813492, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09908926486968994, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09451545029878616, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.061585091054439545, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05347235128283501, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05224622040987015, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05196097865700722, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.030793730169534683, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02665950357913971, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02658035047352314, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02456640638411045, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024382341653108597, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01611216552555561, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015737339854240417, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015709737315773964, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010012987069785595, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.33.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2334858775138855, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.20853881537914276, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.19834591448307037, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.17676126956939697, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10668252408504486, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09678879380226135, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12655305862426758, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.1163053959608078, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11039178818464279, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09224529564380646, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08703862875699997, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06422868371009827, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05575906112790108, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05140483006834984, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05034250393509865, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03218747675418854, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.027004631236195564, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02672741189599037, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02418077178299427, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02349248342216015, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01721973717212677, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017593437805771828, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.015823934227228165, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012291919440031052, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.34.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12282929569482803, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11541083455085754, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11248506605625153, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1022612452507019, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.057352546602487564, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05440836027264595, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06484642624855042, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05999203771352768, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05812690779566765, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05181865766644478, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04935408756136894, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03290468454360962, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.028656097128987312, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.027440598234534264, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.027145612984895706, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016453128308057785, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.0140687832608819, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013948062434792519, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012943916954100132, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012757901102304459, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008587079122662544, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008509613573551178, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00819442793726921, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005491976626217365, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.34.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10570192337036133, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09934668242931366, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09645599126815796, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08772452920675278, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.049278028309345245, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.046624358743429184, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05639725923538208, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05206485092639923, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.049953047186136246, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04455173760652542, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.042514316737651825, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.028586192056536674, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02485949546098709, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.023590127006173134, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02328205667436123, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014288710430264473, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012098131701350212, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011965179815888405, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01114022359251976, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010945768095552921, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0074499621987342834, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007368381600826979, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00704394094645977, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0047420538030564785, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.34.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.23376667499542236, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22007963061332703, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2156408727169037, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.19634994864463806, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10955571383237839, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10463067889213562, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12245059013366699, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11292486637830734, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11104731261730194, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09912658482789993, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0944024994969368, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.062149640172719955, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0538545660674572, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05233865603804588, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.051984064280986786, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.031005309894680977, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026592755690217018, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026472680270671844, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.024427836760878563, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024200355634093285, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01594056561589241, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015584304928779602, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015369663946330547, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00961824506521225, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.34.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2011721432209015, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1764681339263916, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.16898536682128906, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1457798331975937, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09381776303052902, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0843886062502861, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10691626369953156, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09831948578357697, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09640353173017502, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07750257104635239, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06950964033603668, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05509193614125252, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04759696498513222, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04555768519639969, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04507779702544212, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.027736356481909752, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02435440383851528, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.024251356720924377, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.021227028220891953, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02091037668287754, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01531940046697855, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016120538115501404, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014695985242724419, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01210711244493723, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.34.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.18864242732524872, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.17790661752223969, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.17457212507724762, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15921856462955475, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08865255117416382, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08485487103462219, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09818723052740097, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09089924395084381, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.089806467294693, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08035044372081757, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07651387155056, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04998539760708809, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04351305961608887, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.0424954853951931, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.042257025837898254, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02497876062989235, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.021956434473395348, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.021887212991714478, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.020281516015529633, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.020132320001721382, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013161031529307365, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013386567123234272, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012835594825446606, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009075229056179523, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.34.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23440052568912506, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22115856409072876, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21706926822662354, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19812516868114471, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.1099555715918541, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10529837012290955, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12167859822511673, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11267875880002975, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11137842386960983, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09971596300601959, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.0949220061302185, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06181064993143082, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05377088487148285, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.052546512335538864, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05225827917456627, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03083253651857376, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.026735471561551094, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.026650458574295044, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024611566215753555, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02442624792456627, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015955347567796707, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015660423785448074, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015542814508080482, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00979018397629261, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.34.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.23503312468528748, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.21003581583499908, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.20001287758350372, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.17821142077445984, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10735489428043365, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09765466302633286, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12691518664360046, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11669265478849411, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.1111217588186264, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09293638914823532, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08748066425323486, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06447161734104156, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05589932203292847, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05168818682432175, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.050656359642744064, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03254197910428047, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.027068328112363815, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.026823468506336212, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.024238819256424904, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02357437089085579, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.0177654717117548, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017507169395685196, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.016470076516270638, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012135193683207035, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.35.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12424622476100922, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11683055013418198, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1138274297118187, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10360975563526154, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05802105367183685, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05509654060006142, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06597056239843369, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06078691408038139, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05881889909505844, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0524689182639122, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05014495924115181, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.033472925424575806, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.029048560187220573, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.027792073786258698, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02749621868133545, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016774412244558334, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014266196638345718, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014143208041787148, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013134991750121117, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0129451435059309, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008784651756286621, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008656336925923824, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00836777500808239, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0056103370152413845, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.35.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10865383595228195, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10211554914712906, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09932854771614075, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09040942788124084, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05073633790016174, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04807986319065094, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0577910952270031, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05340185761451721, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05143824964761734, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0458710752427578, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04378553479909897, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.029308127239346504, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.025482213124632835, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.024278782308101654, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02398890256881714, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014641763642430305, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012423770502209663, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012302590534090996, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01143067330121994, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011251126416027546, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0076157934963703156, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007511558942496777, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007227304857224226, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00479096919298172, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.35.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2333589792251587, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.21970587968826294, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.21519896388053894, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1961684226989746, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10940653085708618, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10445798933506012, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12224283814430237, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11273548752069473, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11086687445640564, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0989789143204689, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09427646547555923, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06206585094332695, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05378535017371178, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0522504486143589, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05189904943108559, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.030946113169193268, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026535270735621452, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026417508721351624, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02437683381140232, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024142833426594734, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015911294147372246, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015528557822108269, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015337936580181122, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00956692360341549, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.35.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.18710461258888245, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.16315717995166779, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1560995727777481, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.12624479830265045, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.08554160594940186, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0767943412065506, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.09914176166057587, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0897877961397171, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.08792694658041, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06790416687726974, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06044235825538635, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05112569406628609, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.044326577335596085, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04242490977048874, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04199245572090149, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.026732943952083588, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.023982826620340347, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.023876002058386803, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.020868860185146332, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.020601646974682808, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016191834583878517, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017317039892077446, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0156397745013237, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014351461082696915, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.35.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.19381895661354065, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.18265768885612488, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1792411208152771, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.16350345313549042, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09104155749082565, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08713623136281967, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10084519535303116, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09334558248519897, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09223881363868713, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08251304179430008, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07855287939310074, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05135964974761009, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.044675566256046295, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04363032430410385, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04338211193680763, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02567610889673233, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.022479737177491188, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.022412963211536407, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02075212262570858, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.020593686029314995, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013512698002159595, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013617467135190964, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013172636739909649, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009123114868998528, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.35.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23761659860610962, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22411124408245087, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22003218531608582, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20082958042621613, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11157478392124176, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10680905729532242, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12359549850225449, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11431245505809784, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11300273984670639, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10114733874797821, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09639519453048706, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06291159242391586, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.054622892290353775, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05336699262261391, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0530741885304451, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.031448133289813995, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.027248460799455643, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.027166474610567093, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.025104843080043793, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024918407201766968, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016463514417409897, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016115013509988785, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01605387218296528, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010287826880812645, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.35.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.23522719740867615, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.21072958409786224, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.20086824893951416, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.1792737990617752, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10752921551465988, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09790154546499252, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12653520703315735, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11675526946783066, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.1111350730061531, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.093277208507061, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08791979402303696, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.0643724873661995, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05580951273441315, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05164209380745888, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.050626497715711594, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03227524086833, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.026805760338902473, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02656155824661255, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.023967107757925987, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02331143245100975, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01722836308181286, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017012186348438263, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.015909867361187935, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.011404850520193577, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.36.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12661392986774445, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11901083588600159, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11600793898105621, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10553759336471558, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.059180308133363724, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.056210216134786606, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06697934120893478, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06191663816571236, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05999702587723732, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0534825399518013, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.051023777574300766, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03397272899746895, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.029564863070845604, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.028332136571407318, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.028038090094923973, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016985446214675903, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014522372744977474, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01439726259559393, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013357431627810001, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013170548714697361, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008865896612405777, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008771343156695366, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008462793193757534, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0056506372056901455, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.36.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11009392887353897, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10345547646284103, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10064108669757843, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09157107025384903, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0514243021607399, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04871904477477074, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0587010495364666, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05418240651488304, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0521220937371254, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04649714380502701, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04439910128712654, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.029758576303720474, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.025866102427244186, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02461405098438263, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.024316877126693726, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014869732782244682, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012594826519489288, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012464653700590134, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011587250046432018, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011393778957426548, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007730943616479635, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007611704058945179, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00733130844309926, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0048463535495102406, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.36.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2373480498790741, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22354374825954437, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.21895647048950195, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.19958922266960144, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11136237531900406, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10634177178144455, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12427511066198349, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11470238119363785, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11286809295415878, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10076817125082016, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09593559801578522, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06308673322200775, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05472754314541817, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05320184305310249, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05284148082137108, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03146618977189064, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027016328647732735, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026897281408309937, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.024804936721920967, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024571482092142105, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016160625964403152, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015793439000844955, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015593261457979679, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00971555057913065, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.36.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.20986634492874146, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.18323642015457153, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.17370805144309998, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.14736559987068176, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09723813831806183, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08677606284618378, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11380025744438171, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10474256426095963, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10051678121089935, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07902132719755173, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07321295142173767, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05858010798692703, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0504232682287693, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04698844999074936, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04615011811256409, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.029386065900325775, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.024745702743530273, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0244663767516613, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.021190427243709564, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0206252783536911, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015919627621769905, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016118254512548447, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014848089776933193, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011422434821724892, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.36.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.19860216975212097, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.18721142411231995, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1837678849697113, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1675901561975479, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09337098896503448, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08934929966926575, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1035463735461235, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09573234617710114, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09458044171333313, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0845809131860733, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08065283298492432, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05274517834186554, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04581940174102783, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04474988207221031, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.044491250067949295, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.026376083493232727, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.023092351853847504, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.023022886365652084, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.021319052204489708, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.021160725504159927, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013943852856755257, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01403361652046442, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01359319593757391, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009460031986236572, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.36.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23910276591777802, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22538252174854279, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22128866612911224, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.2019505798816681, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11229784786701202, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10747433453798294, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1245039626955986, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1151031032204628, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11373617500066757, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10177623480558395, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09701408445835114, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06334250420331955, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05497639253735542, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.053712718188762665, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05341498181223869, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.031642064452171326, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.027415959164500237, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.0273307915776968, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.025249069556593895, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.025063730776309967, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016584787517786026, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016200656071305275, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016170334070920944, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010329283773899078, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.36.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2377750724554062, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.21313181519508362, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.20326170325279236, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.18119171261787415, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10868769884109497, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09906011819839478, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12795013189315796, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11789373308420181, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11227221041917801, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.0942329540848732, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08876501768827438, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06486082822084427, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05626150220632553, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05212153121829033, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.051115717738866806, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03242437541484833, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.026919037103652954, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.026673924177885056, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.024021906778216362, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.023365026339888573, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.017094479873776436, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01689102128148079, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.015747226774692535, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.011092441156506538, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.37.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12615716457366943, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11860910803079605, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11553235352039337, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10513857752084732, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05902232974767685, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.056001078337430954, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06712076812982559, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06184864044189453, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0598275326192379, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05332689359784126, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05096885561943054, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03403966873884201, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.029544083401560783, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.028255390003323555, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02794506587088108, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01702246442437172, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014464214444160461, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01433824747800827, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013300501741468906, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013105321675539017, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008865663781762123, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008718229830265045, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008445781655609608, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005574364680796862, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.37.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1114766076207161, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1047535091638565, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10188122093677521, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09272043406963348, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.052062153816223145, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.049314387142658234, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0596652515232563, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05485939234495163, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.052788104861974716, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04704935848712921, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04505979269742966, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.030246829614043236, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.026190368458628654, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02494608610868454, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.024642491713166237, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015122042037546635, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012777553871273994, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01264928001910448, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011757921427488327, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011565948836505413, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007893014699220657, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007735537830740213, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0074904984794557095, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004950289614498615, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.37.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.24053743481636047, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22655008733272552, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2219729721546173, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20212997496128082, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11298305541276932, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10787061601877213, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12626247107982635, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11633288860321045, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11453769356012344, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10217347741127014, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09735727310180664, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06418291479349136, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05554598569869995, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.054017432034015656, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05365362390875816, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03200391307473183, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027514565736055374, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027402224019169807, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025282537564635277, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.025054967030882835, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016519496217370033, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016214001923799515, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015936683863401413, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01017514057457447, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.37.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.21198534965515137, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.18143662810325623, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.17144474387168884, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.14552482962608337, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09774485230445862, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08648160099983215, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11440640687942505, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10461846739053726, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10136398673057556, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07745170593261719, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07065095752477646, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.058899350464344025, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0510757640004158, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0479997843503952, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04725686088204384, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02982718124985695, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02640152908861637, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026207543909549713, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.022686081007122993, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.022219371050596237, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016885895282030106, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018488464877009392, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015969431027770042, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014635966159403324, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.37.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.20206449925899506, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.19039367139339447, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.18682198226451874, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17040416598320007, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09506044536828995, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.0908965915441513, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10534846037626266, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09748157113790512, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09628506749868393, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08604678511619568, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08195434510707855, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05368445813655853, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.046646878123283386, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.045559391379356384, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04529835283756256, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.026823360472917557, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.023487716913223267, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02341219037771225, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.021669218316674232, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02150617353618145, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014147216454148293, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014250675216317177, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01379422377794981, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009567039087414742, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.37.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24056239426136017, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22686609625816345, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22261735796928406, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.2030968964099884, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11300690472126007, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10814410448074341, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12521280348300934, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11584877967834473, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11445491015911102, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10234116017818451, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09752988070249557, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06362045556306839, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05532572418451309, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05404644459486008, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.053744956851005554, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03177150711417198, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.027531171217560768, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.027443373575806618, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02533268928527832, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.025137638673186302, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016531651839613914, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016193635761737823, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016108209267258644, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010214577428996563, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.37.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.237544983625412, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.21339082717895508, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.203486368060112, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.18146701157093048, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10885513573884964, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09925014525651932, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12825025618076324, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11817789077758789, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.1123848706483841, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09457632899284363, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08908344060182571, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.0652695745229721, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05661917105317116, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05238354951143265, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05133187770843506, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03287101909518242, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.027338188141584396, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.027072405442595482, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.024522285908460617, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.023861432448029518, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.017631424590945244, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017555637285113335, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.016246579587459564, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012039048597216606, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.38.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13685333728790283, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12863221764564514, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12550324201583862, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11417204886674881, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06407707929611206, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06086970865726471, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0721966102719307, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06680889427661896, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06497222185134888, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.057861100882291794, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0550752654671669, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.036599982529878616, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03189115226268768, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.030653666704893112, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.03035694919526577, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.018276330083608627, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015642879530787468, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01552313007414341, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.014360290952026844, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.014172183349728584, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009462656453251839, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009334366768598557, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009053857997059822, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00589259946718812, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.38.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11958605796098709, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11235761642456055, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10944925993680954, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09963856637477875, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05601816624403, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05312569439411163, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06358659267425537, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05863388255238533, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05678112804889679, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.050581950694322586, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.048296257853507996, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.032281000167131424, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02799047902226448, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.026814276352524757, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02653069794178009, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016131430864334106, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013703247532248497, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013585805892944336, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012588641606271267, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012411965057253838, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00838466640561819, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008223223499953747, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007999283261597157, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005218348931521177, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.38.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.24088312685489655, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2266789823770523, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.22220180928707123, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20239944756031036, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.1130824089050293, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.1079949215054512, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12612146139144897, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11637318134307861, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1146102324128151, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.1021982729434967, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09735045582056046, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06405971199274063, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.055527471005916595, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.054045017808675766, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05369357019662857, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.031956519931554794, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027454879134893417, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02733864262700081, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025194132700562477, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024968475103378296, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016449298709630966, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016047481447458267, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015887534245848656, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009893800131976604, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.38.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.21518658101558685, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.18997763097286224, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1819530874490738, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.15427644550800323, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09975460916757584, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09045960009098053, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11407583206892014, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10523517429828644, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10272999107837677, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08154158294200897, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07457749545574188, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05849805101752281, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05047508701682091, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04793402925133705, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04731873422861099, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.029285605996847153, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.024825522676110268, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0246693417429924, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02118750847876072, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.020772771909832954, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01560276374220848, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015464945696294308, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014789102599024773, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010436736978590488, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.38.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2059011608362198, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.19398242235183716, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.19028246402740479, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17347882688045502, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.0968974232673645, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09264522790908813, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10750284790992737, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09939541667699814, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09818296134471893, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08767178654670715, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08344221115112305, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.054791003465652466, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04757656529545784, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04645279422402382, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04618531093001366, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.027389027178287506, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02392696589231491, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.023850055411458015, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.0220571868121624, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.021886682137846947, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014461086131632328, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014481811784207821, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014093633741140366, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009679812006652355, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.38.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24304427206516266, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22904957830905914, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22474518418312073, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.2049124389886856, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11419755220413208, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10926037281751633, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12650494277477264, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11705995351076126, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11570805311203003, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.1033705398440361, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.0984165295958519, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06432744860649109, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05590995028614998, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05460814759135246, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.054301247000694275, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.032095931470394135, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.027797171846032143, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.027709096670150757, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.0255556832998991, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.025358298793435097, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016666511073708534, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016318853944540024, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016236795112490654, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010233680717647076, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.38.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.24294757843017578, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.21880997717380524, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.2091950923204422, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.18630169332027435, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11133020371198654, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10190057754516602, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13042223453521729, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12031473964452744, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11480772495269775, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09679604321718216, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.0909922868013382, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06616164743900299, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05739651992917061, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05334820598363876, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05236145853996277, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.033112432807683945, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.027433225885033607, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02718493714928627, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02449188381433487, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.023850835859775543, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.017433183267712593, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017014464363455772, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.016109850257635117, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01095658354461193, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.39.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13599522411823273, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12770198285579681, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12445306032896042, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11310657858848572, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0637006163597107, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.060397919267416, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0720972791314125, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06664752960205078, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06459293514490128, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05745638534426689, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05470450595021248, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03661566972732544, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03187521919608116, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.030521197244524956, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.030194764956831932, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.018325088545680046, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015652472153306007, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.015521771274507046, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.014376584440469742, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.014169899746775627, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009569250047206879, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009473765268921852, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009127706289291382, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006124973297119141, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.39.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11869370192289352, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11148900538682938, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10849283635616302, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09863336384296417, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05556719750165939, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05262453854084015, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06333210319280624, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.058402083814144135, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05638105422258377, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05013640597462654, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.047835372388362885, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.032120123505592346, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02787625789642334, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.026603329926729202, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.026294304057955742, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01606074534356594, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013604189269244671, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013475336134433746, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012490440160036087, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01229630783200264, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008345319889485836, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008194636553525925, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007922817952930927, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005209076683968306, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.39.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2412777692079544, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2269199937582016, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2222413271665573, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20227529108524323, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11330306529998779, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10805012285709381, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12653495371341705, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11668159067630768, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11481677740812302, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.1022186279296875, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09732790291309357, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06422512978315353, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.055664680898189545, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05410846695303917, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05373398959636688, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03203248977661133, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02745663747191429, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027336571365594864, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02515718899667263, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024923041462898254, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016450699418783188, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016016658395528793, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01586179994046688, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009812614880502224, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.39.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2173972725868225, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.18275097012519836, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.17105655372142792, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.13811543583869934, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09569142758846283, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08138741552829742, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11748750507831573, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10523331165313721, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10098709911108017, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07528868317604065, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06884060800075531, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05926639959216118, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05201943963766098, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04774793982505798, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.046707633882761, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03083791397511959, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02731645107269287, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027087116613984108, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02373996376991272, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.023115694522857666, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.018486661836504936, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.020263854414224625, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01729605533182621, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.016751237213611603, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.39.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.21101099252700806, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.19874022901058197, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.19493311643600464, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17762401700019836, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09937097132205963, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09500014036893845, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1103377491235733, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10193442553281784, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10069171339273453, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.089856818318367, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08552072942256927, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05622973293066025, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04883068427443504, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04766830801963806, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.047395143657922745, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.028201675042510033, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.024606868624687195, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.024525096639990807, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.022685367614030838, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.022510865703225136, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015047693625092506, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014967674389481544, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014673369936645031, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010097134858369827, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.39.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24640637636184692, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2321573942899704, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22779378294944763, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.2076520174741745, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11587817966938019, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11083779484033585, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12863469123840332, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11885496973991394, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11743032932281494, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10483448952436447, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09975462406873703, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06540212035179138, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.056818414479494095, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05547921359539032, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05515660345554352, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.0327579602599144, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02832813560962677, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.028243275359272957, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.026054637506604195, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.025853173807263374, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01725984551012516, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01676921918988228, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016823800280690193, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01071202289313078, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.39.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.24476900696754456, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.22084540128707886, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.2109919637441635, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.18796515464782715, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11238297075033188, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10285788029432297, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13177336752414703, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.1217682734131813, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.1158178523182869, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.0979054868221283, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09212277084589005, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06694352626800537, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.0582655593752861, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05398977920413017, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05295518785715103, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03353574872016907, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02803792804479599, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02775605209171772, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.025150692090392113, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.024485625326633453, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.017660101875662804, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017779218032956123, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01624883897602558, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01193714328110218, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.40.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.14115497469902039, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1327139288187027, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12949995696544647, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11775246262550354, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06622061878442764, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06289499998092651, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07454949617385864, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06899511069059372, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0671485960483551, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0597190223634243, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05679338052868843, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.037798259407281876, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03293956071138382, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03166964650154114, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.03136780112981796, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0188725758343935, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.016146881505846977, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.016026422381401062, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.014802207238972187, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.014608577825129032, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009755824692547321, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00959830079227686, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009333597496151924, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006005148869007826, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.40.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12470054626464844, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11715331673622131, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11419962346553802, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10386388003826141, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05845062434673309, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05549100041389465, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06616110354661942, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06111285090446472, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0592753067612648, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05275789648294449, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05028950050473213, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03359772264957428, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02917291969060898, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.027984678745269775, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.027696112170815468, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016784077510237694, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014283612370491028, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014169394038617611, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013109974563121796, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012925048358738422, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008696090430021286, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008539215661585331, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008304476737976074, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0053795999847352505, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.40.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.24436882138252258, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22985489666461945, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2252199649810791, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20507153868675232, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11487550288438797, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10962190479040146, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12806181609630585, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.1181887537240982, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1164378821849823, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10367617756128311, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09859811514616013, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0650317370891571, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05641263350844383, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05489600822329521, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05453865975141525, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03243851661682129, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02784649468958378, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027738263830542564, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025511356070637703, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02527843415737152, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01666068658232689, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01621723733842373, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01609162613749504, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009916400536894798, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.40.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.21628880500793457, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1833348274230957, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1727139949798584, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.14962059259414673, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09973349422216415, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08722005784511566, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11576021462678909, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10652689635753632, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10352058708667755, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0782294049859047, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07412295043468475, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05958452820777893, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05147971585392952, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.048397619277238846, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.047666482627391815, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02998802624642849, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.025714049115777016, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02551913447678089, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02155281975865364, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.021047910675406456, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016496500000357628, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016950011253356934, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015534250065684319, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.012397089041769505, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.40.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2153356373310089, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.20268455147743225, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.19881859421730042, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1811324805021286, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10145437717437744, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09693272411823273, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11270155012607574, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10408681631088257, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10281529277563095, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09167619794607162, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08730703592300415, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05750606581568718, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0498509518802166, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.0486520454287529, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04837248474359512, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.028786668553948402, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.025119159370660782, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.025036994367837906, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02314426377415657, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.022963974624872208, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015293760225176811, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01528418343514204, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01491397712379694, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010318556800484657, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.40.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24949520826339722, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2349240779876709, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2304660528898239, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20993800461292267, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11736497282981873, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.1121760830283165, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1299985945224762, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12037990987300873, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11892294138669968, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10606773942708969, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10086116939783096, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06613998860120773, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05750995874404907, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05613968148827553, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.055816296488046646, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.0330270417034626, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.028590023517608643, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.028498761355876923, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02625734731554985, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02604624629020691, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017151182517409325, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016808558255434036, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016699060797691345, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01056898757815361, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.40.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2492372691631317, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.22473852336406708, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.21495108306407928, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.19130933284759521, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11440862715244293, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10485377907752991, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.134351909160614, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12366777658462524, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11793271452188492, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09964299947023392, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09363503009080887, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.0683235451579094, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.0591752789914608, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.054964520037174225, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05394066870212555, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.034418292343616486, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.028530437499284744, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.028267499059438705, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02558758109807968, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.024928811937570572, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018513686954975128, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01806194707751274, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.017151430249214172, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012141073122620583, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.41.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1462976336479187, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.13743135333061218, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1341102570295334, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.12193670123815536, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06864845007658005, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06521099805831909, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0774737223982811, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0715479627251625, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0696236789226532, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06192038580775261, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.058882035315036774, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03931731730699539, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03416986018419266, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03286033868789673, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.032547034323215485, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.019656067714095116, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.016790084540843964, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.016668817028403282, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.015396416187286377, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.015201328322291374, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.010219421237707138, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.010033149272203445, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009766454808413982, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006367208436131477, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.41.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12825219333171844, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12050565332174301, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11746787279844284, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10674488544464111, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.060180582106113434, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05711299180984497, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06823161989450455, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06292106956243515, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06103081628680229, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.054288312792778015, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.051781244575977325, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03465578332543373, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03007076494395733, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.028812658041715622, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02852482721209526, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017322836443781853, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014728852547705173, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014602323062717915, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013510377146303654, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01332009769976139, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008995755575597286, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008820456452667713, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008584355004131794, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00558044295758009, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.41.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.247389554977417, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.23254849016666412, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.22788631916046143, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20729835331439972, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11629791557788849, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11096736043691635, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12959708273410797, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11962523311376572, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11790639162063599, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10487888753414154, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09973486512899399, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06583333760499954, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.057096533477306366, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.055561747401952744, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05520697310566902, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03283118084073067, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02823101356625557, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02811376005411148, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025862611830234528, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.025627467781305313, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016889328137040138, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016491789370775223, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016320517286658287, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010156107135117054, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.41.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.22361992299556732, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.19578731060028076, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.18679459393024445, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.15534383058547974, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10316567122936249, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0932065024971962, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11936594545841217, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11008070409297943, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1072908267378807, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08398688584566116, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07507334649562836, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06122243031859398, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0529220849275589, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04977380484342575, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04900409281253815, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.030615825206041336, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026028236374258995, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.025840844959020615, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02222721464931965, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.021702906116843224, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016343634575605392, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01661767065525055, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015330064110457897, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011555124074220657, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.41.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.21409042179584503, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.20144057273864746, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.19751805067062378, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17980057001113892, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10085522383451462, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09635516256093979, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11191708594560623, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10353420674800873, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.102240189909935, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09106302261352539, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08661960810422897, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.057050831615924835, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04956366494297981, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04835023730993271, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.048067159950733185, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.028529254719614983, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.024893920868635178, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02481522038578987, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.022900141775608063, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02271757461130619, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015024663880467415, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01505138911306858, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014629583805799484, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010032332502305508, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.41.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.25116437673568726, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23637506365776062, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.23186461627483368, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.21111725270748138, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.1182439848780632, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11297930777072906, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1312241554260254, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1213161051273346, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11982449144124985, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10678684711456299, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10150798410177231, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06692077219486237, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05799863487482071, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05659785121679306, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.056270986795425415, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.033413004130125046, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.028897183015942574, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02880173735320568, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.026535172015428543, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.026325808838009834, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.0175077673047781, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.017105266451835632, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.017042912542819977, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010908192954957485, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.41.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2536259889602661, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.22905899584293365, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.21902379393577576, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.1950138509273529, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11660069972276688, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10686775296926498, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13627634942531586, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12615841627120972, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.12009188532829285, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.10157173126935959, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09539981931447983, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06924156844615936, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06032450869679451, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05600586161017418, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.054960787296295166, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03466806188225746, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.029038485139608383, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02875451184809208, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.026039645075798035, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.025362998247146606, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01829339936375618, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018345149233937263, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.016912803053855896, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012259812094271183, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.42.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1428624987602234, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.13403107225894928, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1306842863559723, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1186973974108696, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06705199182033539, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06358931958675385, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0755997747182846, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.07002150267362595, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06798959523439407, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06038584187626839, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.057320356369018555, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.038345251232385635, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.033426132053136826, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03209071233868599, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.03177502378821373, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.019156213849782944, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.016383593901991844, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.016259243711829185, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.015009003691375256, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.014807517640292645, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009935954585671425, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009794478304684162, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009497735649347305, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0061903782188892365, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.42.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12576191127300262, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11797017604112625, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11492854356765747, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10438728332519531, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0589936338365078, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05590064823627472, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0668642446398735, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06175166368484497, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0598391555249691, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.053152333945035934, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05059080198407173, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.033968485891819, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.029514703899621964, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.028254559263586998, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.027961742132902145, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01697355881333351, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014454830437898636, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014333106577396393, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013250994496047497, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013058786280453205, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008835840970277786, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008690748363733292, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00842929445207119, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005541959311813116, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.42.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.24901223182678223, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2340264767408371, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2291930615901947, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.2084091454744339, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11710893362760544, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11172207444906235, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13055865466594696, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12055497616529465, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11877420544624329, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10555137693881989, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10028955340385437, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06627063453197479, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0575321726500988, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05598410964012146, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05561719834804535, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03307076543569565, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028403175994753838, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.028286276385188103, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025995945557951927, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02575894258916378, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016963046044111252, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01656084693968296, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01638892851769924, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010131334885954857, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.42.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.21124745905399323, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.17900095880031586, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.16620804369449615, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.14364442229270935, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09762664139270782, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08341658115386963, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1175900399684906, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10770011693239212, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10102096945047379, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07757819443941116, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07144970446825027, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06033841148018837, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05170424282550812, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04704933613538742, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.045909617096185684, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.030189475044608116, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02455960400402546, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.024115191772580147, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.020609665662050247, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.019811494275927544, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016088511794805527, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015920689329504967, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014531143940985203, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010800071060657501, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.42.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.21534034609794617, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.20248647034168243, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1984880119562149, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.18069736659526825, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10152330249547958, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09693074971437454, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11264299601316452, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1042286604642868, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10292314738035202, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09159688651561737, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08709380775690079, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.057474516332149506, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.049941286444664, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.0487133152782917, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.048424653708934784, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.028746347874403, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.025153523311018944, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.025068439543247223, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02313792146742344, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.0229521282017231, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01520478818565607, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015306934714317322, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014808913692831993, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01032312959432602, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.42.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2534840404987335, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2384083867073059, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.23378634452819824, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.21280433237552643, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11935317516326904, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11396384239196777, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.13227009773254395, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12246640771627426, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.12098845094442368, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10767724364995956, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10232432186603546, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06732054799795151, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05853041261434555, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05709976330399513, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05676408112049103, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.033597249537706375, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.029080312699079514, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02898634411394596, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.0266701839864254, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.026453370228409767, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017445245757699013, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01710723526775837, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016976479440927505, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010755032300949097, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.42.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2597335875034332, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.23481763899326324, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.22478628158569336, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.20016732811927795, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11955110728740692, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.109717458486557, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.14046280086040497, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.128937765955925, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.12310625612735748, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.10418562591075897, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09784942865371704, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07108710706233978, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.0617116317152977, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05747109651565552, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05643289536237717, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03573423624038696, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.029846549034118652, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02958149090409279, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02678825519979, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.026125291362404823, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.019064953550696373, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018896393477916718, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.017676152288913727, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012744187377393246, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.43.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.16072483360767365, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1506275236606598, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1467612236738205, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.13304586708545685, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.07550223171710968, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0715220645070076, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.08560995012521744, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.07886351644992828, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.07659487426280975, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06787143647670746, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06467381864786148, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.04359789565205574, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.037760406732559204, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.036207716912031174, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.03584932163357735, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.021793073043227196, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.018650230020284653, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.018507158383727074, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.017105914652347565, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01687726378440857, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.011433150619268417, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.011388428509235382, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01093168742954731, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.007525027729570866, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.43.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13419674336910248, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12572899460792542, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1220025047659874, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11054279655218124, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06287871301174164, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05930182710289955, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07205040007829666, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06645345687866211, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0637994185090065, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05649522691965103, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05382542312145233, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03657972812652588, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.031771961599588394, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.030153078958392143, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.029757728800177574, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.018297743052244186, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015523112379014492, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01535444799810648, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.014231945388019085, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0139891617000103, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009594300761818886, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009547009132802486, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009073076769709587, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006261642090976238, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.43.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.25208571553230286, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2367069274187088, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.23168158531188965, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.21038375794887543, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11861524730920792, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11294016242027283, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13272134959697723, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12229207903146744, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12026415765285492, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.1067286804318428, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10138426721096039, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06747224181890488, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.058391839265823364, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05668562278151512, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.056286562234163284, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03363734111189842, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02881106548011303, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02868219092488289, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.026339882984757423, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.026083335280418396, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017344733700156212, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01689511351287365, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016709905117750168, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010422377847135067, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.43.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10004469752311707, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09203530102968216, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.0896744653582573, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07916554808616638, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.047179121524095535, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04432154446840286, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.053065378218889236, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04871094226837158, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04799330607056618, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04090290516614914, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.038102708756923676, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02745746448636055, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02388167381286621, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.023178331553936005, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02301456406712532, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01395408995449543, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012782701291143894, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012736319564282894, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011622765101492405, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011519686318933964, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008026977069675922, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008870957419276237, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007823572494089603, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.007135128136724234, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.43.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1937371790409088, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1820274144411087, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.17842260003089905, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.16223542392253876, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09142961353063583, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.0872395783662796, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1016504243016243, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09392445534467697, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09271592646837234, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08239177614450455, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07835058122873306, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05192553624510765, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04503660649061203, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.043905265629291534, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04363942891359329, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.025931181386113167, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02277448959648609, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.022700976580381393, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02095784805715084, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.020793085917830467, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.0138021782040596, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014034051448106766, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013434403575956821, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009659245610237122, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.43.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23900403082370758, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2246108502149582, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2202189713716507, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20033001899719238, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11259239166975021, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10744636505842209, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12491782009601593, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11560595780611038, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11414185911417007, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10145345330238342, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09639973193407059, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06360723823308945, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05520598217844963, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05383536219596863, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.053512103855609894, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03167656436562538, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02743762917816639, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.027346789836883545, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.0251433327794075, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024935651570558548, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01645401306450367, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01616634428501129, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01599758490920067, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010182014666497707, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.43.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2212579995393753, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.19715243577957153, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.18686389923095703, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.16648277640342712, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10122635215520859, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.0915447473526001, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12122607231140137, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11117663979530334, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.10482430458068848, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.0873488262295723, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08265667408704758, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.0617600753903389, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.053343214094638824, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.048886802047491074, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.04779795557260513, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.031009837985038757, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02590038999915123, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.025603627786040306, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02322683110833168, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.022529086098074913, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.016918083652853966, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01719960942864418, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.015515045262873173, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01230512373149395, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.44.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.14728723466396332, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.13830536603927612, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1349674016237259, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.12268397957086563, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06921008229255676, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06573664397001266, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07784487307071686, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.07199552655220032, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.07019008696079254, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06232370436191559, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.059278033673763275, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03955131024122238, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03439034894108772, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.033127229660749435, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0328308641910553, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.019753122702240944, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01693907007575035, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01682179793715477, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.015531014651060104, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.015334367752075195, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.010274184867739677, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.010142709128558636, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009854013100266457, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006470527499914169, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.44.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12802553176879883, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1202017143368721, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11718431860208511, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10644876211881638, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.060094065964221954, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.057016558945178986, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06808507442474365, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06279455125331879, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06093715876340866, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05411549285054207, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05159826576709747, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03458873927593231, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.029973307624459267, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.028759365901350975, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.028474241495132446, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017263641580939293, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01469394937157631, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014577859081327915, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013471368700265884, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013285602442920208, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008971743285655975, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008806666359305382, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008565610274672508, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005580926313996315, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.44.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.24693147838115692, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.23209716379642487, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2273627370595932, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20681674778461456, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11618904024362564, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.1108095720410347, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12959393858909607, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11957362294197083, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11778969317674637, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10475628823041916, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0996212363243103, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06587646156549454, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05707555264234543, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05553266033530235, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.055159490555524826, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03284426033496857, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028212927281856537, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02810295671224594, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02583353780210018, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.025596285238862038, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01693427376449108, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01651056483387947, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016368478536605835, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010181446559727192, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.44.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11306829005479813, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10354211926460266, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.100409597158432, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08814362436532974, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05335249379277229, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.049722351133823395, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.060725897550582886, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05569067969918251, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05456232279539108, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04624020680785179, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04249132424592972, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.031353287398815155, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02706407941877842, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.025995450094342232, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.025741225108504295, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015770448371767998, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013994493521749973, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013925318606197834, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012632723897695541, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01247803308069706, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008768840692937374, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00936767365783453, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008438791148364544, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.007157088257372379, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.44.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1795869618654251, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16889236867427826, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16559484601020813, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1506589949131012, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.0847216323018074, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08089268952608109, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09426967054605484, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08700624108314514, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08588890731334686, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07644768059253693, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07276060432195663, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04818073287606239, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0417313352227211, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04069032520055771, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0404508151113987, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.024056974798440933, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02110910974442959, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.021042395383119583, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019447727128863335, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01929567940533161, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012827351689338684, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013013597577810287, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012493968941271305, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008961967192590237, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.44.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23431357741355896, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2204555869102478, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2161778211593628, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19683973491191864, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11036275327205658, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10540316253900528, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12279260903596878, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11325369030237198, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11187078803777695, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09960146248340607, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09483352303504944, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06252534687519073, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05412750318646431, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05280511826276779, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.052486956119537354, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.031230950728058815, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.026981616392731667, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.026895467191934586, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024758487939834595, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02456057444214821, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016449885442852974, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01599772460758686, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016023993492126465, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01024429127573967, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.44.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.22414138913154602, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.20077434182167053, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.19069916009902954, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.17073273658752441, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10266628861427307, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09312798827886581, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12267604470252991, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11258410662412643, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.10622821748256683, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.08924789726734161, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.0845760628581047, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.062291648238897324, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05402064323425293, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.0495266318321228, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.04841550067067146, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03119778074324131, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02611692063510418, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.025805843994021416, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.023507140576839447, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.022807134315371513, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01677320897579193, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01718895137310028, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.015345519408583641, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012131325900554657, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.45.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12836292386054993, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12044693529605865, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.117462657392025, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10667306184768677, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06024215370416641, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.057140860706567764, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06786343455314636, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06279754638671875, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06110145151615143, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05421248823404312, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05157896876335144, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03449558466672897, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.030020155012607574, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02884066477417946, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02856171503663063, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017228510230779648, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014788256026804447, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014681318774819374, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013558306731283665, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013378303498029709, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0089865168556571, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008927828632295132, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0086011728271842, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005766726564615965, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.45.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10944308340549469, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10263891518115997, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09990566223859787, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09070475399494171, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.051278289407491684, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04852873831987381, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05842738226056099, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05388828366994858, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05203552916646004, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.046182192862033844, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04401545971632004, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.029661133885383606, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02574574388563633, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0245684627443552, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02428063564002514, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014823023229837418, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012583649717271328, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01246426347643137, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011539850383996964, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0113622872158885, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007721941452473402, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007615143898874521, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0073364549316465855, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004876985680311918, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.45.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.24389345943927765, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22921821475028992, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.22437961399555206, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.2041703760623932, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11469554901123047, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10929714888334274, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1279313564300537, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11809124797582626, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11626346409320831, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10333621501922607, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09828199446201324, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06503961980342865, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05635563284158707, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0548093244433403, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.054431818425655365, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03241211175918579, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027844073250889778, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027724847197532654, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025486795231699944, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.025255171582102776, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016685480251908302, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016301734372973442, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016113990917801857, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010057015344500542, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.45.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.15637560188770294, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.13790413737297058, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.13158714771270752, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10892447829246521, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.07263202965259552, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06584063172340393, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.08410084247589111, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.07730182260274887, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0751623585820198, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05889943242073059, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05245715007185936, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.04347064346075058, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03757340461015701, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03546552732586861, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.034949757158756256, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.021917667239904404, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.019187649711966515, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01905033178627491, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01660255528986454, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01627819798886776, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.012210553511977196, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.013052865862846375, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.011571734212338924, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010016310028731823, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.45.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17886891961097717, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16826274991035461, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16499680280685425, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15023143589496613, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.0843464806675911, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08055146783590317, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09370297193527222, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08660861849784851, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08548470586538315, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07616134732961655, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07253420352935791, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04785744845867157, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04150707647204399, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04048634320497513, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04024519398808479, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02388681285083294, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.020978296175599098, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.020911863073706627, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019332118332386017, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01918061450123787, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012707156129181385, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01290135271847248, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012377574108541012, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008844812400639057, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.45.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23145067691802979, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21785423159599304, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2136445790529251, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19457262754440308, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10896578431129456, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10407724231481552, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12089310586452484, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11182630062103271, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.1104561910033226, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09838428348302841, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09363078325986862, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06157656013965607, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.053405746817588806, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.052111104130744934, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05180203169584274, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.030684277415275574, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.026568055152893066, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.026479659602046013, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02438213862478733, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024184413254261017, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015992581844329834, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015665307641029358, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015563375316560268, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009899580851197243, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.45.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.22177231311798096, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.19896145164966583, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.18916751444339752, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.1693977564573288, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10163242369890213, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09235169738531113, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12109608948230743, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11123115569353104, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.10512615740299225, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.08844053000211716, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08375684916973114, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06157498061656952, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05336298421025276, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.048983294516801834, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.047918450087308884, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.031088169664144516, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02579599618911743, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.025507157668471336, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.023226415738463402, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02255021035671234, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01711134798824787, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.016913551837205887, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01578492671251297, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.011886007152497768, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.46.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12176594138145447, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11433465033769608, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11150017380714417, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10136711597442627, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05709715932607651, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05419648438692093, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06471916288137436, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.059583015739917755, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05791284888982773, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05148854851722717, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04909983277320862, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.032866477966308594, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.028476323932409286, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.027353474870324135, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.027082236483693123, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01645779423415661, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014019926078617573, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013915559276938438, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01287519559264183, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01270141452550888, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008615514263510704, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008459261618554592, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008235790766775608, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005449818912893534, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.46.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10580527782440186, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09933310002088547, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09678814560174942, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08797728270292282, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.049617644399404526, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04703495651483536, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05628805235028267, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05193526670336723, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0503048375248909, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0447375513613224, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04265064746141434, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02859015204012394, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.024813439697027206, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.023749573156237602, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02349015325307846, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014276616275310516, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012146883644163609, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012046358548104763, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011149218305945396, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010987510904669762, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007420942187309265, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007311509922146797, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007071193307638168, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004656517878174782, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.46.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.23916377127170563, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22489462792873383, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.22030343115329742, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20050013065338135, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11245279014110565, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10724721103906631, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12545371055603027, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11580342054367065, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11400829255580902, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10148505121469498, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0965069830417633, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0637524202466011, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.055283281952142715, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.053749486804008484, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05338944122195244, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03179183974862099, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027296362444758415, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027181748300790787, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02500913292169571, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024773400276899338, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016369450837373734, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015966562554240227, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015803761780261993, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009813336655497551, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.46.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1626652330160141, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1391742080450058, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.13239701092243195, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10102622956037521, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.07395205646753311, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0658494383096695, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.08640976995229721, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.07810075581073761, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.07634880393743515, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05629542097449303, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05009094998240471, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.044305261224508286, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.038628753274679184, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03676798194646835, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.03631211444735527, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.023145319893956184, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.020994151011109352, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.020884163677692413, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.017987918108701706, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01769905537366867, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.014146337285637856, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015418002381920815, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.013629072345793247, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01290189754217863, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.46.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.18365399539470673, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.17292599380016327, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1695881187915802, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15442337095737457, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.0866268202662468, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08273554593324661, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09623491764068604, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08891890943050385, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.0878012403845787, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07823963463306427, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07454266399145126, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04911497235298157, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.042590342462062836, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04155636951327324, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04130496829748154, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.024529924616217613, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.021471215412020683, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02140122279524803, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.01977694034576416, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019623171538114548, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013017663732171059, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01310249138623476, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01268057245761156, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008870945312082767, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.46.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23421645164489746, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22061394155025482, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2163599580526352, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19715456664562225, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11030146479606628, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10543181747198105, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12252475321292877, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11316774785518646, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11178123205900192, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09967502951622009, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09494829177856445, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06250157952308655, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0541062168776989, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.052796438336372375, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.052490975707769394, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.031213372945785522, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.027002086862921715, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.026917453855276108, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02482437714934349, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02462686598300934, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016449181362986565, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016062473878264427, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016034428030252457, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010338308289647102, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.46.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.22585457563400269, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.20314976572990417, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.19356493651866913, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.1734190732240677, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10351795703172684, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09432780742645264, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12234044075012207, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11282908171415329, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.10692999511957169, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.0902615487575531, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08543973416090012, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06223325431346893, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.053962308913469315, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.04974695295095444, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.048713330179452896, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.031169161200523376, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.025873888283967972, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.025599021464586258, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.023243477568030357, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.022580035030841827, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.016638249158859253, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.016531821340322495, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.015292290598154068, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.011146784760057926, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.47.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12137853354215622, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11399373412132263, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1112508773803711, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10115443170070648, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.056960124522447586, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05409146845340729, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06427261233329773, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0593603141605854, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05776466429233551, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05135427415370941, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04890948906540871, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03264767676591873, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02835468016564846, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.027266480028629303, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.027011210098862648, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016311757266521454, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013966316357254982, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01386232953518629, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012813899666070938, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012651578523218632, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008511623367667198, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008407847955822945, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008153890259563923, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00540737621486187, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.47.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10462653636932373, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09830173850059509, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09573985636234283, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08702308684587479, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.049083609133958817, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.046534743160009384, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05580934137105942, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05144230276346207, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04976585507392883, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04427500069141388, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04224330559372902, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.028345521539449692, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.024567311629652977, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02349822223186493, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.023243116214871407, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014151007868349552, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012022028677165508, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011917037889361382, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011035316623747349, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010873147286474705, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007358322385698557, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007236681412905455, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007007377687841654, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004605456255376339, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.47.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.24441374838352203, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2298537939786911, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.22522002458572388, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20500649511814117, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11492419987916946, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10966759920120239, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12816442549228668, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11826831102371216, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11650815606117249, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10372689366340637, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0986918956041336, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0651029422879219, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05643685907125473, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05492176488041878, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05456545576453209, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03245846554636955, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027889125049114227, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02777557447552681, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025554541498422623, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.025321805849671364, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01670377515256405, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01628858968615532, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016137227416038513, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010011398233473301, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.47.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.19752536714076996, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.16867266595363617, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1579880565404892, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.13226506114006042, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09086183458566666, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07984208315610886, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10827258229255676, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09950241446495056, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09481282532215118, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0714581236243248, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06615979224443436, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.055800240486860275, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04799351468682289, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04404556751251221, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.043082769960165024, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.028003636747598648, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.0233576949685812, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.023051975294947624, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01961594633758068, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.018961498513817787, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015249362215399742, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015547818504273891, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014012763276696205, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011177397333085537, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.47.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.18868450820446014, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.17769046127796173, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.17425130307674408, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15880955755710602, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08898276835680008, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08503053337335587, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09891534596681595, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09132545441389084, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09018520265817642, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08042246848344803, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07663774490356445, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05043262988328934, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04372364655137062, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.042666126042604446, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04241103306412697, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.025204461067914963, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.022038310766220093, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02196386829018593, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02029917761683464, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02014152519404888, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01336810551583767, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013423498719930649, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013027871958911419, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009071877226233482, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.47.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23568403720855713, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2219247817993164, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21772921085357666, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19839727878570557, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11096090078353882, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10604789853096008, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12322920560836792, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1138315200805664, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11244659870862961, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10029429197311401, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09553232043981552, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06272005289793015, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05438366159796715, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05307985097169876, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05277230963110924, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03131164237856865, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.027106229215860367, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.027014754712581635, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02490781992673874, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024708913639187813, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016422223299741745, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01603969745337963, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015994995832443237, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010216142050921917, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.47.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.23086859285831451, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.20737434923648834, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.19757398962974548, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.1768151819705963, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10572844743728638, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09629068523645401, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12510554492473602, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11515507102012634, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.10922407358884811, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09197241812944412, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.0870286375284195, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06343302875757217, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.054965805262327194, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05072179436683655, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.049689341336488724, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03168332204222679, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.026252975687384605, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02598246932029724, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.023503419011831284, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.022837702184915543, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.016754593700170517, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01658780314028263, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.015374986454844475, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.010960477404296398, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.48.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11871285736560822, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11153558641672134, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10874836146831512, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09890677034854889, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05568919703364372, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05286348611116409, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06310763210058212, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05811481550335884, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.056482668966054916, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05022505298256874, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04795457050204277, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03207399696111679, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.027770213782787323, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.026669781655073166, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.026402635499835014, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016022002324461937, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013636105693876743, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013536292128264904, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012518245726823807, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012351470068097115, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008344209752976894, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008189687505364418, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007976719178259373, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00522615248337388, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.48.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1044081524014473, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09809313714504242, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09551914036273956, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08692730963230133, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04892171919345856, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04638632386922836, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.055898282676935196, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05128703638911247, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04962112009525299, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04414421319961548, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.042242631316185, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02836153469979763, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02450932189822197, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02343878336250782, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.023185012862086296, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01417174655944109, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011996827088296413, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011892760172486305, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011018633842468262, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010857640765607357, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00740178395062685, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007230229210108519, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007052527274936438, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004611661657691002, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.48.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.24315214157104492, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22866806387901306, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.22409413754940033, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20390845835208893, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11441156268119812, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10916902869939804, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12762437760829926, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11775264889001846, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11601715534925461, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10323797166347504, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09825114905834198, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0649365782737732, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05624022334814072, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05470836162567139, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.054342739284038544, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03238556906580925, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027875859290361404, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027762729674577713, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025564389303326607, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.025327591225504875, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01674443483352661, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016429992392659187, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016176896169781685, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010297920554876328, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.48.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.19691509008407593, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.16050443053245544, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.14740683138370514, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.12471102178096771, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0899379774928093, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07475528120994568, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10851754248142242, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09892935305833817, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09446938335895538, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0682142898440361, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.061421461403369904, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05597611144185066, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.048332542181015015, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04417803883552551, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.043163035064935684, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.028254715725779533, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02440766617655754, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.024121807888150215, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.020517442375421524, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01986865885555744, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015882093459367752, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017392681911587715, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014605150558054447, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.013705910183489323, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.48.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1940058469772339, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.18263868987560272, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.17907778918743134, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1632372885942459, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09148858487606049, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08742231875658035, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10152491927146912, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09388948231935501, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09271913766860962, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.082681804895401, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07872094959020615, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.051785700023174286, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04495331645011902, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04386050999164581, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04360911250114441, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.025848880410194397, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02264973148703575, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.022579116746783257, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.020871855318546295, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.020706072449684143, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013662834651768208, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013800867833197117, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013308558613061905, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009323102422058582, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.48.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2374517321586609, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22367340326309204, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2194288969039917, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19998633861541748, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11183281242847443, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10688042640686035, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1240464299917221, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11469006538391113, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11330441385507584, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10106943547725677, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09619494527578354, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06311293691396713, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.054779160767793655, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05346573516726494, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05315135791897774, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03145988658070564, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.027256490662693977, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02717329002916813, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02504120022058487, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02484176866710186, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016393868252635002, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.0160740464925766, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015966707840561867, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010159295052289963, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.48.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2317664921283722, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2086765021085739, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.19906853139400482, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.1779523342847824, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10638362169265747, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09710323810577393, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12544897198677063, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.1156504899263382, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.10977170616388321, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09272664040327072, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08762867748737335, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06393729895353317, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05543595179915428, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05122988671064377, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.050188448280096054, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03216826543211937, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.026797421276569366, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02652178891003132, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02410564385354519, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.023446612060070038, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01726258546113968, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01729181408882141, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.015874508768320084, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.011929739266633987, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.49.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12931294739246368, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1214846670627594, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11857405304908752, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10785724222660065, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.060672368854284286, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.057665638625621796, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0682031437754631, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06309079378843307, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.061522871255874634, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.054692283272743225, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.052014224231243134, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.034614913165569305, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03011692874133587, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.029023434966802597, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.028757957741618156, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01726721227169037, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014808867126703262, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014709396287798882, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013574285432696342, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013409122824668884, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008947021327912807, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00881494302302599, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008578460663557053, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0055536022409796715, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.49.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11210133135318756, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10529793798923492, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1027202159166336, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09343715757131577, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05260635167360306, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.049951501190662384, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05957230180501938, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05487946420907974, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05332900583744049, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04746433347463608, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.045286450535058975, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.030250973999500275, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02621690183877945, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.025181930512189865, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02493697963654995, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015118053182959557, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012860922142863274, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01276496984064579, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011812317185103893, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011651703156530857, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007847011089324951, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007701834663748741, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007501049432903528, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004876113496720791, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.49.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.24663032591342926, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.23199217021465302, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.22733865678310394, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20695506036281586, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11598376929759979, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11067381501197815, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12918122112751007, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11921410262584686, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1175561398267746, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10466574877500534, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09957044571638107, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06569252163171768, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05692136660218239, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.055437490344047546, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05507639795541763, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03273975849151611, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028151119127869606, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02803809382021427, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025796109810471535, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02557436376810074, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016868557780981064, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0164471585303545, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016328198835253716, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010137975215911865, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.49.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.19753672182559967, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.17642302811145782, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.16785818338394165, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1364973932504654, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09234672784805298, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08361046761274338, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10770417749881744, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09955711662769318, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09441271424293518, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07423631846904755, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06737342476844788, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05551132932305336, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.047891050577163696, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04450257495045662, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04367893189191818, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.027820181101560593, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.023259099572896957, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02291572466492653, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01969001814723015, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.019123848527669907, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.014862403273582458, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.014952796511352062, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.013754570856690407, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010310331359505653, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.49.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.19916470348834991, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.18750838935375214, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1838424801826477, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.16758319735527039, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09394959360361099, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08974006772041321, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10429753363132477, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09640830010175705, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09519454091787338, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08484713733196259, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08080773055553436, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.053207479417324066, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04616363346576691, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04504740238189697, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0447860024869442, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02658701129257679, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.023258505389094353, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02318231202661991, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.021417848765850067, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.021251952275633812, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01408003456890583, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014161174185574055, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013720283284783363, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009558090940117836, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.49.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23956111073493958, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22554542124271393, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22124621272087097, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20158863067626953, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11275621503591537, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10776466131210327, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12506510317325592, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1156507134437561, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.114260733127594, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10185567289590836, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09692732989788055, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06361491978168488, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05524445325136185, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05391378700733185, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.053608305752277374, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03170640394091606, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.027476422488689423, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.027387889102101326, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.025227220728993416, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.025029178708791733, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016475973650813103, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01617872528731823, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016041748225688934, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010187744162976742, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.49.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.23867201805114746, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2151564359664917, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.20584900677204132, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.18361254036426544, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.1095195859670639, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10034924745559692, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12821128964424133, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11819107830524445, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11297281086444855, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09532386064529419, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08974912017583847, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06507360935211182, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.056375011801719666, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05247058719396591, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05151977390050888, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03250754624605179, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.0269930399954319, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.026758890599012375, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02413039840757847, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.023506304249167442, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.017124593257904053, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.016748543828725815, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.015850970521569252, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.010801691561937332, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.50.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1284361034631729, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12057171761989594, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11755582690238953, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10687209665775299, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06027709320187569, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.057180628180503845, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06807611137628555, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06288275867700577, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06112618371844292, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05428919941186905, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05168081447482109, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03456827998161316, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.030044453218579292, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02886338159441948, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.028581054881215096, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017276640981435776, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014776123687624931, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014660933054983616, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013549253344535828, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013369579799473286, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00901660043746233, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008884760551154613, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00861763209104538, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005690313875675201, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.50.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11139682680368423, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10457397252321243, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10186457633972168, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09263239800930023, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.052264440804719925, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.049524880945682526, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05935632064938545, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.054692745208740234, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.053001657128334045, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04710528254508972, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04488410800695419, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.030126415193080902, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.026122266426682472, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.025014737620949745, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02475018799304962, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015045637264847755, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012787090614438057, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01267542876303196, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011722601018846035, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011555231176316738, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0078140739351511, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007676145993173122, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007443130481988192, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004868199583142996, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.50.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2475215047597885, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.23265796899795532, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.22794438898563385, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.2073424905538559, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11636348068714142, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11095323413610458, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12980233132839203, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11976612359285355, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11798061430454254, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10489515960216522, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09973864257335663, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.065938301384449, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05714874714612961, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0555974505841732, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05522478371858597, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03287317231297493, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028211910277605057, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.028094034641981125, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025826355442404747, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.025591863319277763, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016899798065423965, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016458887606859207, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016320016235113144, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010087878443300724, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.50.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.17998792231082916, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1406102478504181, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12539544701576233, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10378118604421616, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.07995123416185379, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06387196481227875, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10122328251600266, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09230398386716843, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.08591622859239578, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.059059057384729385, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05513507500290871, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.051256533712148666, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.045018136501312256, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03944820538163185, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.038023415952920914, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.026053598150610924, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.022011766210198402, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02160525880753994, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01832687295973301, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.017410285770893097, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.014557050541043282, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01615297794342041, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.012893900275230408, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.012694960460066795, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.50.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.20448100566864014, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1924613118171692, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.18872183561325073, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17191320657730103, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.0964779332280159, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09217412769794464, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1072249561548233, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09904736280441284, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09778700023889542, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08714204281568527, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08295021206140518, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.054712217301130295, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.047448404133319855, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.046289458870887756, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.046020470559597015, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02738904394209385, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02393261343240738, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.023855013772845268, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02203858457505703, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.021864498034119606, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014600493013858795, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014604438096284866, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014224380254745483, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009895163588225842, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.50.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2429388016462326, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22869925200939178, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.224281445145607, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20436428487300873, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11438832432031631, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10931681841611862, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12718144059181213, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11740437895059586, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11593963205814362, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10332760959863663, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09836442023515701, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0646904930472374, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.056122928857803345, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05476924031972885, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05444233492016792, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.032357197254896164, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.028009746223688126, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.0279155895113945, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.025726746767759323, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.025525366887450218, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01707059144973755, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016636980697512627, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016632668673992157, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010686405003070831, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.50.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.23876851797103882, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.21562501788139343, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.2061385065317154, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.1839125007390976, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10977555811405182, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10057196021080017, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12856827676296234, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11875689774751663, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11312054842710495, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09570173919200897, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09014784544706345, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06538356095552444, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.056854262948036194, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.052756380289793015, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.0517510250210762, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.032732583582401276, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.027439335361123085, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.027176711708307266, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.024643512442708015, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.024002520367503166, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.017283285036683083, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017455918714404106, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.015933165326714516, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.011807986535131931, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.51.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1334954798221588, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12540413439273834, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1224551573395729, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11134329438209534, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06269298493862152, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.059594929218292236, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07048056274652481, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06513768434524536, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06360819935798645, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05650684982538223, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.053719617426395416, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0357765257358551, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.031107764691114426, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.029990706592798233, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.029724247753620148, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01785442978143692, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015281636267900467, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.015183184295892715, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.014005439355969429, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01383284479379654, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009240471757948399, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009069801308214664, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008866557851433754, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005668891593813896, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.51.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11720848083496094, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1100645661354065, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1074230894446373, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09771514683961868, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.055038485676050186, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05227713659405708, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06227369233965874, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.057359084486961365, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05581914633512497, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04964863508939743, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.047289859503507614, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.031608350574970245, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.027394352480769157, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.026342028751969337, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.026090124621987343, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015771253034472466, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013443238101899624, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013345486484467983, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012327583506703377, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01216796413064003, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008185929618775845, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008009739220142365, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00783340260386467, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0050422027707099915, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.51.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.25147706270217896, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.23639604449272156, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2316715568304062, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.21084770560264587, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11829507350921631, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11288629472255707, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13179080188274384, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12163776904344559, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11991191655397415, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10669035464525223, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10152499377727509, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06696918606758118, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05806666612625122, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05652008205652237, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.056168366223573685, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03338141739368439, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028679465875029564, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02857310324907303, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02626442350447178, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02603013627231121, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01717170886695385, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01672082208096981, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01660723239183426, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01023612730205059, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.51.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2136126607656479, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1755536049604416, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.16242918372154236, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.14251048862934113, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09812051057815552, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08322367072105408, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11565487086772919, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10639964789152145, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10228660702705383, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07570606470108032, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07100790739059448, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05931701138615608, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05116980895400047, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.047404929995536804, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04649621993303299, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02972300909459591, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.024807559326291084, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02454206347465515, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.020377183333039284, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01972576044499874, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0160101018846035, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016006121411919594, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014823028817772865, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011068122461438179, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.51.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.21006332337856293, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1976737678050995, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.19381532073020935, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17654405534267426, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09910441935062408, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09468408674001694, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11014585196971893, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10172966867685318, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10043630003929138, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08948343247175217, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08525646477937698, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05618951842188835, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.048719990998506546, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04752983897924423, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.047243792563676834, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.0280900988727808, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02454787865281105, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.024470863863825798, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.022601697593927383, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02242892235517502, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014898235909640789, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014956516213715076, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014519976451992989, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010103859938681126, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.51.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24571606516838074, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2312759906053543, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22681649029254913, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.2065618336200714, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11571603268384933, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11054426431655884, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1284017264842987, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11876571923494339, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.1172792837023735, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10444139689207077, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.0993969663977623, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0653516873717308, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05673310160636902, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.055356722325086594, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05502631515264511, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.032582614570856094, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02822040766477585, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.028123537078499794, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.025895124301314354, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02568480186164379, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.0169659610837698, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016637690365314484, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016510995104908943, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01051115058362484, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.51.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2463829070329666, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.22259710729122162, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.2132047861814499, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.1898701936006546, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11332304775714874, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10406450182199478, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13264520466327667, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12201425433158875, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11667239665985107, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09863187372684479, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09289660304784775, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06748998165130615, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.058275606483221054, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05434722825884819, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05339280888438225, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.033940088003873825, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.028085945174098015, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.027852609753608704, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.025142991915345192, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.024525195360183716, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018242353573441505, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017570113763213158, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.016952894628047943, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.011591717600822449, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.52.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13898152112960815, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.13053424656391144, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12743112444877625, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11582685261964798, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06525775790214539, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0620197094976902, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07352960109710693, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06783477216959, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06618379056453705, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05879674479365349, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05595787987112999, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.037363868206739426, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03239085152745247, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.031233640387654305, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.03095742128789425, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.018651971593499184, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01594950258731842, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01584850624203682, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.014622705988585949, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.014445716515183449, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009696504101157188, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009515051729977131, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009296691045165062, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006028219126164913, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.52.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12122325599193573, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11382625252008438, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11103393137454987, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10102514177560806, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05694636330008507, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.054056890308856964, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06447692960500717, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05936608836054802, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05776647478342056, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.051329076290130615, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.048951417207717896, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0327664390206337, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02836853638291359, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.027272561565041542, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02701301872730255, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016366807743906975, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013929090462625027, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013829778879880905, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012772951275110245, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012611795216798782, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008513194508850574, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008328056894242764, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008146505802869797, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0052724555134773254, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.52.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.252704381942749, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.23749037086963654, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2327251136302948, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.21173663437366486, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11890629678964615, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11343604326248169, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13243147730827332, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12225236743688583, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1205415204167366, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.1072317436337471, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10193997621536255, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06729841977357864, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05834457650780678, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.056821178644895554, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0564650297164917, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03355138748884201, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028850605711340904, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02874370850622654, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.026416104286909103, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02618119679391384, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01727238856256008, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016840828582644463, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01671534962952137, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010355744510889053, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.52.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2246524542570114, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.19264771044254303, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1821242719888687, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.15387842059135437, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10400371253490448, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0915476456284523, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12069082260131836, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11098679155111313, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10772162675857544, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08249198645353317, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07455164194107056, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.061943165957927704, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05337635800242424, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.050175007432699203, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.049407102167606354, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.031001174822449684, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026268212124705315, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026063017547130585, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.022045325487852097, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02151699922978878, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016634952276945114, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016837652772665024, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015616526827216148, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011764299124479294, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.52.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.20810095965862274, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.19578656554222107, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.19196292757987976, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1747438907623291, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09822160750627518, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.0937904417514801, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1090276688337326, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1008332148194313, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09955939650535583, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0886123850941658, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08433137089014053, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0556272529065609, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0482725165784359, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04708404466509819, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04680417478084564, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.027773771435022354, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.024287711828947067, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.024209506809711456, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.022335369139909744, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.022159280255436897, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014640090987086296, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014742415398359299, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014250028878450394, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009886217303574085, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.52.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24748371541500092, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23279939591884613, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2282993048429489, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20779818296432495, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11662882566452026, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11141227930784225, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12961649894714355, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11970779299736023, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11823022365570068, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.1052636057138443, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10011366009712219, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06606800854206085, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05724620446562767, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05584707111120224, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05551770329475403, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03297721967101097, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.028607146814465523, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.028513228520751, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02627882920205593, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.026065168902277946, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017355535179376602, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.017068132758140564, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016899529844522476, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011071563698351383, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.52.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.25026142597198486, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.22645722329616547, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.21697473526000977, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.19323532283306122, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11525954306125641, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10590042173862457, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1345251351594925, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12413807213306427, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.1186029464006424, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.10050778090953827, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.0944136530160904, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06826014816761017, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.059325918555259705, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05533602088689804, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05436652898788452, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03418409824371338, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02867959439754486, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02843654528260231, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02573631890118122, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02511216327548027, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018102651461958885, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.0180573221296072, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.016822440549731255, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01207856647670269, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.53.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13683369755744934, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12841013073921204, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12531611323356628, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11386203020811081, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06430070847272873, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.061021577566862106, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07239997386932373, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06694946438074112, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06521400809288025, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.057879239320755005, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.054984841495752335, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03672875836491585, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03197529911994934, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.030766848474740982, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.030485985800623894, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0183419082313776, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015706874430179596, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.015596214681863785, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.014380422420799732, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.014202595688402653, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009517809376120567, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009368451312184334, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009116762317717075, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005912096705287695, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.53.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1203402578830719, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11291569471359253, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1101241484284401, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1000078022480011, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.056531231850385666, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05361169949173927, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06398528814315796, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.059008002281188965, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05735069885849953, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05091046541929245, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04845857992768288, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.032517313957214355, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.028190521523356438, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.027061255648732185, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.026801547035574913, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016231730580329895, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013837921433150768, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013732722960412502, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012680898420512676, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01250978372991085, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008450877852737904, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008296515792608261, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008076267316937447, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005270558875054121, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.53.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2546692192554474, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2392565757036209, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.23435579240322113, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.21302077174186707, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11981254816055298, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11425267159938812, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1335470825433731, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12327192723751068, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12148411571979523, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10794064402580261, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.1025054082274437, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06782232224941254, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05879996716976166, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0572456419467926, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.056878991425037384, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03380409628152847, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02905651554465294, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02894018031656742, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.026586590334773064, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.026345131918787956, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017382420599460602, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016943765804171562, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01681002601981163, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010386436246335506, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.53.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2010863572359085, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.16846494376659393, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.15401716530323029, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.13296997547149658, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.092524453997612, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07739763706922531, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1144503653049469, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10503610223531723, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09630607068538666, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0729246437549591, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06774337589740753, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05883081629872322, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05048787221312523, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.044760704040527344, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04332181438803673, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.029460402205586433, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.023636167868971825, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.023065581917762756, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.019837558269500732, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.018861308693885803, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01584176905453205, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015879664570093155, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.013979786075651646, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011063274927437305, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.53.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2100650668144226, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.19752034544944763, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.19366560876369476, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17615658044815063, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09913860261440277, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09464968740940094, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1102483868598938, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10183297097682953, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10051941871643066, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08940839022397995, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08509205281734467, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05630553141236305, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04878724366426468, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04757848009467125, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0472925528883934, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02815547212958336, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.024586699903011322, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.024506162852048874, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02262440323829651, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02243974804878235, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014992067590355873, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015014927834272385, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01459544338285923, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010172553360462189, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.53.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24944481253623962, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23459197580814362, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22998052835464478, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20936031639575958, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11750412732362747, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11220403760671616, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.13047122955322266, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12065071612596512, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11915336549282074, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10597338527441025, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10074432939291, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06642257422208786, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05763279274106026, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05621752887964249, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.055881716310977936, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03310469910502434, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02865510620176792, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.028560101985931396, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.026267310604453087, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.026055339723825455, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017231525853276253, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.0168894175440073, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016766179352998734, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010655577294528484, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.53.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.25717633962631226, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2329205870628357, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.2233603596687317, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.1988452672958374, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11856444180011749, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10910136252641678, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13878893852233887, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12733614444732666, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.1219363659620285, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.10336659848690033, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.0970437154173851, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.0702328309416771, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06090095266699791, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05696902424097061, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.056003253906965256, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03523368388414383, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.029560307040810585, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02933325804769993, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.026528289541602135, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.0259194765239954, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01878838613629341, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018636783584952354, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.017493100836873055, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012567688710987568, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.54.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.14179062843322754, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1329522728919983, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12970204651355743, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11772620677947998, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06661064922809601, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06317883729934692, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0751260370016098, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0693836510181427, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0675797089934349, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05988109111785889, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.056902699172496796, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0381515696644783, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.033128634095191956, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.031870774924755096, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.03157995268702507, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.019029445946216583, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01628037542104721, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.016168540343642235, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.014895581640303135, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.014707455411553383, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00987329427152872, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009722841903567314, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009445099160075188, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00614364817738533, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.54.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12246770411729813, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11484627425670624, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1119144856929779, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10157988220453262, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.057556990534067154, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.054518166929483414, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06528141349554062, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06014677509665489, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.058374255895614624, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.051769327372312546, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04929580166935921, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.033185865730047226, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.028739219531416893, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.027555767446756363, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02727489173412323, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016572032123804092, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014086736366152763, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013973130844533443, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012901335023343563, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01271614246070385, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008623699657619, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008450486697256565, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0082321185618639, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0053597320802509785, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.54.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2609327733516693, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.24494943022727966, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.23998574912548065, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.21786369383335114, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12282514572143555, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11700837314128876, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13689234852790833, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12630608677864075, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12452854961156845, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.11046861857175827, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10492193698883057, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06956116110086441, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06029396876692772, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0586785227060318, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.058294303715229034, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03467249497771263, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.029774241149425507, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.029666990041732788, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02722652070224285, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.026983924210071564, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0178391020745039, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01737828738987446, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017252860590815544, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010665243491530418, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.54.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.21164190769195557, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.17820966243743896, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.166513592004776, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.12987075746059418, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09570688754320145, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08297345042228699, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11513170599937439, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10560308396816254, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10130056738853455, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07372941076755524, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06574316322803497, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.059095002710819244, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.050671711564064026, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04613010585308075, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.044990696012973785, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.029590368270874023, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02401360124349594, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.023718709126114845, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.019661754369735718, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01886843889951706, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01589103974401951, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01548478752374649, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014433097094297409, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010400035418570042, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.54.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2096579372882843, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1970626413822174, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.19316484034061432, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17569951713085175, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09901722520589828, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09447644650936127, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10995651036500931, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10166903585195541, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10037286579608917, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08921797573566437, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08484572917222977, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05615195631980896, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0487159788608551, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04750709980726242, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04722118005156517, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.028049614280462265, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02453780174255371, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.0244551170617342, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02255626954138279, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.022377202287316322, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014845523051917553, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014959476888179779, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014449404552578926, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010112377814948559, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.54.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24883483350276947, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23391054570674896, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22930601239204407, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.208597332239151, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.1172860860824585, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11195001751184464, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.13020910322666168, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12040168792009354, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11894509196281433, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.1056920513510704, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10051855444908142, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06634452939033508, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.057554200291633606, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05612986534833908, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0557953380048275, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03308454155921936, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.028656207025051117, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.028564363718032837, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.026265084743499756, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.026055440306663513, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017258772626519203, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01695472002029419, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01678640767931938, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01078023761510849, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.54.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2550402283668518, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2303566038608551, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.22054681181907654, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.19622798264026642, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11739999055862427, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10777351260185242, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13722769916057587, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.1264403760433197, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.1209249421954155, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.10218657553195953, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09594159573316574, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06985434144735336, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06040418520569801, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05632337927818298, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.055324576795101166, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.0351056233048439, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.029087603092193604, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.028842004016041756, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02603510394692421, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02539360150694847, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01882198452949524, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018183179199695587, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.017481939867138863, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.011973416432738304, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.55.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13973356783390045, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.13108837604522705, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12775248289108276, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11592663079500198, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06568162143230438, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.062247224152088165, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07426606118679047, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06857223063707352, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06662428379058838, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05901523306965828, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05614081770181656, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.037699297070503235, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03276412934064865, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.031446706503629684, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.03113195300102234, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.018834199756383896, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.016075022518634796, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01595068722963333, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.014705508016049862, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.014504722319543362, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00978654995560646, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009639505296945572, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009342126548290253, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006118634715676308, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.55.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12219735234975815, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11455687880516052, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11160759627819061, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1012972742319107, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05741457641124725, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05439100041985512, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06506207585334778, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.060022782534360886, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05824138969182968, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.051611851900815964, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.049129363149404526, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03306306153535843, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.028687678277492523, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.027487440034747124, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02719966322183609, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016503484919667244, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01405187975615263, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013937416486442089, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012858893722295761, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012679769657552242, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008580727502703667, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008430218324065208, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008187072351574898, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005343878176063299, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.55.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2566962242126465, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.24099421501159668, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2359364628791809, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.21428906917572021, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.1207999512553215, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11509670317173004, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13458633422851562, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12438200414180756, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12251581996679306, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10865607857704163, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10315723717212677, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06832937151193619, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05937201529741287, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05772474408149719, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0573359839618206, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03407996892929077, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02927597612142563, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.029155705124139786, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.026756584644317627, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.026501452550292015, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01750705949962139, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017056839540600777, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016916096210479736, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010401932522654533, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.55.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2051878720521927, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.17885026335716248, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1698143482208252, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.14695680141448975, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0946371853351593, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08427854627370834, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11140716075897217, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10122566670179367, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09776459634304047, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07674329727888107, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07209637016057968, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.057094037532806396, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.048995375633239746, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.046027202159166336, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04529804736375809, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02887990139424801, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.024713879451155663, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.024501550942659378, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02131645940244198, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.020849451422691345, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01629846729338169, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016635311767458916, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015423894859850407, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.012492959387600422, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.55.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.20867276191711426, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.19612020254135132, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.19216899573802948, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1747652143239975, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09855538606643677, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09400300681591034, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10954336822032928, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10121835023164749, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09991636127233505, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08877851068973541, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08439663052558899, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.055909860879182816, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0485134981572628, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04729820415377617, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04701397940516472, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02795145846903324, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02449100650846958, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.024410421028733253, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.022523168474435806, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.022340942174196243, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014828999526798725, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015019661746919155, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014439377002418041, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01024805847555399, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.55.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24774141609668732, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23281702399253845, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22817891836166382, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20756039023399353, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11681161820888519, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.1114884614944458, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1299149990081787, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11995632946491241, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11846227198839188, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10526692867279053, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10008936375379562, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06624040752649307, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05738954618573189, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05596936121582985, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.055631235241889954, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03309254348278046, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.028683912009000778, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02858685702085495, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.026311680674552917, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.026099015027284622, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01746562495827675, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.017133450135588646, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.017001811414957047, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01112869381904602, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.55.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.25677257776260376, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.23181413114070892, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.22161249816417694, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.196940615773201, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11827784776687622, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10829014331102371, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13923580944538116, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12799064815044403, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.12183933705091476, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.10283981263637543, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.096579410135746, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07071290910243988, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.0612436980009079, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05681021511554718, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05572112649679184, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.035638049244880676, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.029434407129883766, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.029131723567843437, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.026347249746322632, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02564011700451374, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01922685094177723, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018582943826913834, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.017820358276367188, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012355206534266472, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.56.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.14179058372974396, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1329125016927719, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12947358191013336, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11740504950284958, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06661557406187057, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06312599778175354, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07529661059379578, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06963150948286057, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06761045753955841, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05980914086103439, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05676911398768425, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.038213640451431274, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03325524181127548, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03190129995346069, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0315721370279789, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.019078683108091354, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.016314653679728508, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.016184629872441292, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.014916078187525272, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.014710898511111736, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0099251763895154, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00980130210518837, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009483115747570992, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006240107584744692, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.56.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1211274191737175, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1135212704539299, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11042844504117966, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10008801519870758, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.056883301585912704, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.053821172565221786, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06480350345373154, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05972844362258911, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05772598832845688, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.051100440323352814, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04863473400473595, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03294195979833603, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.028552312403917313, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02726418524980545, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0269619207829237, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016462069004774094, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013977698050439358, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013849589973688126, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012795675545930862, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012600255198776722, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008595949970185757, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008462360128760338, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008173431269824505, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005441334098577499, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.56.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.25999176502227783, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.24399298429489136, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.23893140256404877, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.21675927937030792, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.1224021166563034, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11659549921751022, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1365402638912201, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12604577839374542, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12413930147886276, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.1100274920463562, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10442937165498734, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06936044991016388, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06014947593212128, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.058509085327386856, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05809438228607178, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03456251323223114, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02968265302479267, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.029567016288638115, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.027108274400234222, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02685886062681675, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017767008394002914, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017323847860097885, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017156602814793587, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010602939873933792, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.56.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.19060039520263672, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1627977341413498, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.15228182077407837, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.13108420372009277, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.08687829971313477, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07574151456356049, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10413176566362381, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09604430198669434, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09105090796947479, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0703134536743164, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06492991745471954, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.053186241537332535, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.046008992940187454, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.041811998933553696, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04078424721956253, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.026566237211227417, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.021784931421279907, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.021454554051160812, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.018501346930861473, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01779690943658352, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.013988957740366459, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01406420860439539, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.012626792304217815, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009450927376747131, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.56.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2090778946876526, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.19653043150901794, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.19264672696590424, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17512661218643188, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09874948114156723, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.0941961407661438, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10983604937791824, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10142159461975098, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10011424124240875, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08895845711231232, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08457816392183304, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05613616853952408, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04864143207669258, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04741679131984711, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04712978005409241, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02806391753256321, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.024549201130867004, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.024460559710860252, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.022568101063370705, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.022388311102986336, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014947250485420227, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015043312683701515, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014552793465554714, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010244376957416534, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.56.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2413303554058075, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22680440545082092, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22227568924427032, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20216964185237885, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11379052698612213, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.1085490733385086, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1265486627817154, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1168697252869606, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11539282649755478, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10251887887716293, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09738633781671524, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06451515853404999, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.055892378091812134, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.054497700184583664, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0541691780090332, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03221921622753143, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02788769081234932, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.027794530615210533, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02556791342794895, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.025355786085128784, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01694115251302719, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016598735004663467, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016485532745718956, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010709690861403942, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.56.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.25439324975013733, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2291623204946518, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.21896661818027496, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.19454921782016754, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11702735722064972, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.1070735827088356, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13751229643821716, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12677617371082306, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.12071175873279572, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.10164307057857513, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.0953558161854744, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06987830251455307, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.060575954616069794, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05619322881102562, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05511773005127907, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.034942876547575, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.029086096212267876, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.028817782178521156, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.026011411100625992, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.025322195142507553, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01847526617348194, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018328163772821426, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.017056960612535477, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012158442288637161, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.57.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13963118195533752, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1307985782623291, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1274147778749466, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11551447957754135, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06558041274547577, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06208980828523636, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0741666853427887, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06860627979040146, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06655155122280121, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.058875419199466705, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05586516112089157, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03767140209674835, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03277593106031418, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03140927106142044, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.031080076470971107, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01879723370075226, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.016070080921053886, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01594182662665844, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.014691561460494995, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.014480075798928738, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009793595410883427, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009670338593423367, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009342638775706291, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006166693288832903, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.57.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11863648146390915, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1112000122666359, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10813316702842712, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09805203974246979, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05571264401078224, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05270680785179138, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06341967731714249, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05851096659898758, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.056531865149736404, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.050046440213918686, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04763566702604294, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03221696615219116, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.027943657711148262, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.026695117354393005, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.026400746777653694, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016097044572234154, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013686136342585087, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013560342602431774, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012525618076324463, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012334276922047138, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008406511507928371, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008283567614853382, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007999386638402939, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005330508109182119, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.57.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2593596875667572, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.24340146780014038, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.23824211955070496, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.21622571349143982, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12213292717933655, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11629661917686462, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13621139526367188, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.1257639080286026, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12384980916976929, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10976838320493698, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10421054810285568, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06924212723970413, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06003693491220474, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05836918205022812, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05798332393169403, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03451048955321312, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02962915599346161, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02950994297862053, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02706163376569748, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.026802577078342438, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017752990126609802, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017304467037320137, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017148664221167564, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010601826943457127, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.57.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.18842466175556183, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.16253812611103058, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1543489396572113, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.12716665863990784, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.08744874596595764, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07748281955718994, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10048114508390427, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09233132749795914, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0902140736579895, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06859152764081955, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.061921995133161545, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.051491908729076385, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04432320594787598, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04210229590535164, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04156537726521492, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.025738758966326714, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.021918658167123795, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.021786192432045937, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01818019524216652, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.017811287194490433, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01377183198928833, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.013810993172228336, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.013062222860753536, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009505209513008595, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.57.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.20843033492565155, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.19580015540122986, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1919003427028656, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1744384467601776, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09843858331441879, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09390205889940262, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10939151048660278, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10115007311105728, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09983595460653305, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08865633606910706, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08417381346225739, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05577057600021362, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.048469893634319305, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04724374786019325, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04695121943950653, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.027860719710588455, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02441154606640339, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.024323832243680954, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.022423552349209785, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.022237326949834824, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014716227538883686, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01489317324012518, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014312103390693665, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010059994645416737, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.57.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23576299846172333, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22145144641399384, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21703216433525085, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19731996953487396, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11119789630174637, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10606798529624939, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12352284789085388, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1141822338104248, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11274490505456924, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10011342167854309, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09512346237897873, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06292014569044113, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0545891709625721, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.053224578499794006, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.052898768335580826, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03137825429439545, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02717939019203186, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.027092203497886658, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02490379475057125, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02469312772154808, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016369879245758057, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01610492169857025, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015916768461465836, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01026933640241623, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.57.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.25590094923973083, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.22946308553218842, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.21887920796871185, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.1945178359746933, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11744340509176254, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10705172270536423, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13820037245750427, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.1274460256099701, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.12145930528640747, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.10173484683036804, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09545428305864334, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07024982571601868, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.060880161821842194, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.056361593306064606, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05526386573910713, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03512635454535484, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.029117275029420853, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.028856156393885612, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.025961536914110184, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.025239691138267517, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01852015033364296, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01828760653734207, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.017051544040441513, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.011995399370789528, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.58.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13518795371055603, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12646789848804474, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12291097640991211, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11125357449054718, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06341181695461273, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.059885915368795395, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07211317867040634, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06670789420604706, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06438877433538437, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.056846026331186295, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05394656956195831, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03663983941078186, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03186988830566406, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03038523904979229, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.03002186119556427, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01831101067364216, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015573408454656601, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.015415918081998825, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.014221611432731152, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013992402702569962, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009552869014441967, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009427347220480442, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009072717279195786, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00603907834738493, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.58.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11196307837963104, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10474197566509247, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10161272436380386, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09199332445859909, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.052492108196020126, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04949207603931427, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06009206175804138, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.055453259497880936, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05326493829488754, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04706418514251709, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04474529251456261, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03047875128686428, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02648654580116272, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02515067346394062, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.024824853986501694, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015243330039083958, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012905263341963291, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012767990119755268, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01180292759090662, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011595413088798523, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007974620908498764, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007863948121666908, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007547319401055574, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00506970752030611, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.58.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.25613877177238464, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.24025507271289825, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.23509247601032257, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.21322093904018402, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12063057720661163, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11480412632226944, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13488996028900146, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12438498437404633, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1223490983247757, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.108311228454113, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10281539708375931, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06859313696622849, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05939747393131256, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0576874241232872, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05726748704910278, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03419801592826843, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.029340658336877823, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.029204949736595154, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.026781784370541573, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02652406319975853, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017629500478506088, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017227474600076675, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016991840675473213, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01068080123513937, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.58.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1954953819513321, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.16577422618865967, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.155979722738266, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.13233979046344757, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.08959965407848358, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07841236144304276, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1052994653582573, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09675072878599167, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09379792213439941, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07110200822353363, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06450522691011429, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.054027725011110306, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04654378071427345, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04332760348916054, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04255254939198494, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.027110528200864792, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.022906683385372162, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0227176733314991, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01932578720152378, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.018783530220389366, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.014634518884122372, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01503953617066145, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.013604323379695415, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010769162327051163, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.58.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.20150256156921387, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.18920302391052246, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.18534496426582336, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.16848021745681763, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09517014026641846, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09072022885084152, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10594011098146439, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09782328456640244, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.0964968353509903, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08562148362398148, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08135437220335007, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05407485365867615, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04689934849739075, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.0456898994743824, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04540068283677101, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.027048977091908455, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.023650577291846275, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02357122115790844, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02173212729394436, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.021546250209212303, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014383822679519653, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014499788172543049, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01398487202823162, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009879815392196178, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.58.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.22394050657749176, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2103058397769928, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.206030935049057, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1872606873512268, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10560598224401474, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10068795084953308, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11750434339046478, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1084900051355362, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10712418705224991, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09505607932806015, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09030681848526001, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05992760509252548, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05189339071512222, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05057806894183159, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0502653606235981, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02989644557237625, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.025886086747050285, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.025798926129937172, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.023715293034911156, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02351670153439045, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015678057447075844, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01541717629879713, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015241937711834908, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009935390204191208, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.58.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.24696676433086395, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.22110480070114136, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.21084216237068176, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.1869649440050125, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11348485201597214, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10333219170570374, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13440589606761932, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.1231379508972168, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11739396303892136, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09803565591573715, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.0920204445719719, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06834738701581955, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05897656828165054, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.054606322199106216, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05354634299874306, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03426675871014595, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02849939651787281, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.028248613700270653, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02544754184782505, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.024760710075497627, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018377112224698067, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01827632263302803, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.016944987699389458, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012504996731877327, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.59.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12769737839698792, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11938746273517609, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11589689552783966, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10477596521377563, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.059921517968177795, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.056498587131500244, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06853381544351578, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06319653987884521, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.060829803347587585, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05365777388215065, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05095646157860756, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03486841917037964, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.030230319127440453, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.028740106150507927, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02838069573044777, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017434624955058098, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014808954671025276, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01466203574091196, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013534890487790108, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013309618458151817, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009178860113024712, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009100990369915962, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008702371269464493, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0059913028962910175, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.59.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10883089154958725, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10172326862812042, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.0985696017742157, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08911582082509995, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05101390182971954, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04798559099435806, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0587538443505764, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.054173775017261505, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.051776375621557236, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04569122567772865, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04341481253504753, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.029863018542528152, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.025904711335897446, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.024473801255226135, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02412172220647335, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014940979890525341, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012606695294380188, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012455672957003117, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011527699418365955, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011310843750834465, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007874035276472569, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007769064977765083, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0074136946350336075, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0050889733247458935, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.59.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.24581779539585114, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2303195297718048, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2251138687133789, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20382605493068695, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11572184413671494, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10998887568712234, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1296355426311493, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11956038326025009, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11748870462179184, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10373294353485107, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09833256900310516, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0659155547618866, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05708844214677811, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.055333562195301056, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.054917916655540466, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.032870933413505554, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028168385848402977, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.028036214411258698, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025680439546704292, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02541416697204113, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016974132508039474, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016592444851994514, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016347244381904602, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010313053615391254, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.59.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1362396627664566, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11795931309461594, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11229746043682098, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09542408585548401, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06138657033443451, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.055073902010917664, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07317273318767548, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06662559509277344, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.064840167760849, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05106034874916077, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0467098206281662, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03749576956033707, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.032809194177389145, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03043687902390957, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.029872803017497063, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.019105641171336174, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.017292948439717293, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01718553714454174, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.015479369089007378, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.015142698772251606, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.011068915948271751, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.012710539624094963, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.010382414795458317, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010483263991773129, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.59.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1923101842403412, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.18061593174934387, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1769109070301056, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.16071100533008575, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09083200991153717, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08657776564359665, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10107548534870148, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09339633584022522, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09212177246809006, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08172362297773361, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07763069868087769, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0516541562974453, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.044828347861766815, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.043656300753355026, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0433749295771122, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.025841202586889267, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02268858253955841, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02260753884911537, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.020859580487012863, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02068442478775978, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013772806152701378, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01404036395251751, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.0133843794465065, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009710945188999176, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.59.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.20799243450164795, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.19530531764030457, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.19133305549621582, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17386941611766815, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09812941402196884, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09354537725448608, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10921302437782288, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10090671479701996, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09952393919229507, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0882842093706131, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08385223895311356, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05572337657213211, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04830130189657211, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04704918712377548, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04674875736236572, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.027833839878439903, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02419321797788143, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.024107549339532852, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02218562364578247, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.021994438022375107, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014677968807518482, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.0145809231325984, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014260509051382542, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009625263512134552, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.59.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.23715481162071228, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.21115966141223907, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.20101380348205566, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.1782117784023285, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.1089201271533966, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09876470267772675, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12931856513023376, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.1183670163154602, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.1129022017121315, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09372396022081375, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08798126876354218, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06605667620897293, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.056898970156908035, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05262962728738785, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05158538743853569, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.033270131796598434, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.027833009138703346, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.027610156685113907, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02488502860069275, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.024228617548942566, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018203698098659515, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018325038254261017, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.016863718628883362, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.013117951340973377, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.60.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1251935511827469, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11717526614665985, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11369466781616211, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1029023677110672, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05877318233251572, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.055426936596632004, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06748805195093155, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06211881339550018, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05966322869062424, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05269483104348183, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05019659176468849, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.034315817058086395, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02970406971871853, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.028203612193465233, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.027842599898576736, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01718893088400364, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014537577517330647, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014380767941474915, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01330976840108633, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013080134056508541, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009076874703168869, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008947931230068207, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008608062751591206, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005890370812267065, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.60.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10446150600910187, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09776581823825836, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.094619020819664, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08559008687734604, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04895343631505966, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04604966193437576, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05652816221117973, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0521494597196579, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04967271536588669, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.043906766921281815, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04174809902906418, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.028715552762150764, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02493315003812313, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.023496728390455246, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.023147549480199814, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014379849657416344, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012113651260733604, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011958539485931396, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01108919270336628, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01086962316185236, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007591753266751766, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007498939987272024, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007136872038245201, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004920239094644785, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.60.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2500193119049072, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2342459261417389, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.22917728126049042, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20754843950271606, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11773552000522614, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11199157685041428, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13172085583209991, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12144660949707031, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11945541948080063, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10558217018842697, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10015235096216202, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06698844581842422, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05800285562872887, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.056309595704078674, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.055905647575855255, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03340074047446251, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028666015714406967, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02853880636394024, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02614763006567955, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.025891771540045738, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01722528412938118, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016884515061974525, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01660367287695408, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010528544895350933, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.60.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.14913474023342133, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12504343688488007, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11798910796642303, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09661532938480377, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0682653933763504, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05952217057347298, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07937568426132202, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.07228939235210419, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.07127893716096878, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05251958593726158, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04834426939487457, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.04096659645438194, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.035426996648311615, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0336330384016037, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.03319593518972397, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.020958900451660156, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01873338408768177, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01867588609457016, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.015938539057970047, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.015666231513023376, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01222635991871357, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.013308290392160416, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01169832143932581, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010786234401166439, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.60.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1574241816997528, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.14779753983020782, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.14477859437465668, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1315731406211853, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.0747177004814148, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.0712348148226738, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.0832122191786766, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.07682600617408752, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.07575007528066635, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.06728700548410416, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06403752416372299, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04295680671930313, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.037563346326351166, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.036620065569877625, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03640787675976753, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02168831042945385, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02022925205528736, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02017267979681492, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.018869493156671524, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.018742064014077187, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012355601415038109, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014061597175896168, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012076891027390957, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011375296860933304, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.60.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.16394363343715668, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.15392008423805237, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.15079376101493835, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.13704600930213928, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.07779833674430847, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07419147342443466, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.08668544143438339, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.07998373359441757, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.07886926084756851, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07010986655950546, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06672516465187073, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04474318027496338, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.039117131382226944, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.038140635937452316, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03791198879480362, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.022625500336289406, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.021070636808872223, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.021012600511312485, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.01965402252972126, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019521648064255714, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012948649935424328, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01464544702321291, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012646341696381569, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011849582195281982, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.60.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.19636796414852142, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.17280711233615875, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.16369891166687012, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.1448846310377121, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.08979228883981705, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.08083169162273407, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.10930848866701126, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.09837448596954346, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.09346122294664383, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.07695597410202026, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.07268266379833221, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.05603222921490669, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.04791710153222084, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.0440073236823082, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.043036699295043945, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.028676889836788177, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.024254288524389267, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.024055689573287964, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.021909283474087715, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02132471092045307, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.016523608937859535, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017165429890155792, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.015323334373533726, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01346854493021965, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.61.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11687780171632767, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10937128216028214, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10598021745681763, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09574412554502487, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05481228604912758, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05158407613635063, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06354838609695435, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05817171931266785, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05561641603708267, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.049051299691200256, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04674642160534859, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03233105689287186, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0278236772865057, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02629665657877922, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.025926334783434868, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016181260347366333, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013562997803092003, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01340174674987793, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012396854348480701, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012163055129349232, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008535408414900303, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008383489213883877, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00804342795163393, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005527683068066835, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.61.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09189195185899734, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.0859808549284935, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.0830690786242485, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07513351738452911, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04303056746721268, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04039745405316353, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.050128474831581116, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04606124013662338, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.043678224086761475, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.038583457469940186, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03673966974020004, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02549077942967415, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.022034157067537308, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.020661763846874237, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.020329037681221962, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.012777102179825306, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.010703095234930515, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01055261678993702, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.009806477464735508, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.009594625793397427, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006794274784624577, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006709694862365723, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006356905680149794, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004483919125050306, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.61.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.23835831880569458, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22333383560180664, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2182961255311966, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.19751711189746857, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11245152354240417, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10673566162586212, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12759773433208466, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.1162281483411789, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11405202746391296, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10076254606246948, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09604623168706894, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06525702774524689, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05586104467511177, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0540352389216423, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05356872081756592, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03295016288757324, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027761461213231087, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027606790885329247, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025351589545607567, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.025055745616555214, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017276519909501076, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01675802655518055, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01660117134451866, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010904253460466862, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.61.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1320691704750061, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10096599906682968, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08987879008054733, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.0790473222732544, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.058652009814977646, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04464057460427284, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07882071286439896, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06591609120368958, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06282103061676025, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04425383731722832, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04330362379550934, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.040427401661872864, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.033961016684770584, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.030844977125525475, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.030089912936091423, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.022350644692778587, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01973007246851921, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01957331970334053, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01761549524962902, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01720968633890152, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015112549066543579, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01637008786201477, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014299368485808372, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014734365046024323, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.61.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.14921823143959045, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.13987669348716736, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1368974894285202, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.12414845824241638, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.07029403746128082, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.06691017746925354, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.07851547002792358, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.07236609607934952, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.07132164388895035, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0631038248538971, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.05991349741816521, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0400356687605381, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.034684497863054276, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03374059498310089, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.033514801412820816, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.020015329122543335, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.01747717335820198, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.017413608729839325, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.016030505299568176, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01589411124587059, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01059657335281372, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.010745457373559475, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.0102810924872756, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.007346519269049168, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.61.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.13936953246593475, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1305927187204361, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1278051733970642, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.11599995940923691, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.0660671517252922, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.06289011985063553, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.07391827553510666, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.0680425688624382, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.06701920181512833, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.059371985495090485, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.05644092708826065, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.03810681775212288, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.03308641538023949, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.032190289348363876, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03198255971074104, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.01921514980494976, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.017471488565206528, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.017419544979929924, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.016212450340390205, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01609043963253498, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.010803448967635632, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.011800462380051613, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.010523085482418537, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009245488792657852, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.61.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.08619017899036407, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.07499109208583832, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.06977394968271255, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.06177788972854614, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.03929620236158371, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.03461000695824623, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.05075356364250183, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.04478882625699043, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.0411902517080307, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.03363103047013283, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.032053813338279724, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.026045745238661766, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.022170795127749443, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.01977839507162571, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.019171390682458878, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.0135518629103899, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.011632626876235008, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.011471208184957504, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.010688120499253273, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.0103446114808321, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.008140778169035912, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.00903370976448059, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.007388315163552761, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.007631157990545034, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + } + ], + "last_module_idx": 126, + "base_perplexity": 6.844775008040753 +} \ No newline at end of file