diff --git "a/measurement.json" "b/measurement.json" new file mode 100644--- /dev/null +++ "b/measurement.json" @@ -0,0 +1,138606 @@ +{ + "measurement": [ + { + "key": "model.layers.0.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.01703064516186714, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.016871917992830276, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.008001403883099556, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.007968862541019917, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.007932210341095924, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.004898960702121258, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.02462795563042164, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0167556069791317, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.007950452156364918, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.007909832522273064, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.008452445268630981, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.009546810761094093, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.007900412194430828, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.005558614153414965, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.004879991058260202, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.005993719212710857, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.004872886463999748, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.004661934915930033, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.0048715583980083466, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.004660232458263636, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0052037788555026054, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0048711043782532215, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.004952073097229004, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0046593202278018, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.01769017055630684, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.017446134239435196, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.008559944108128548, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.008501654490828514, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.00843970850110054, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0041082995012402534, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.021969379857182503, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.01728539727628231, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.008475810289382935, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.008396207354962826, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.008608238771557808, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.00923270545899868, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.008378745056688786, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.005172024946659803, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.004061111249029636, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.005149032920598984, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.004041442181915045, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.003627776401117444, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.004038018640130758, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.003622581483796239, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0037648186553269625, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0040365541353821754, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0032234331592917442, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0036199381574988365, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.05914674326777458, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.049192577600479126, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.038992296904325485, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.033033810555934906, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.026258226484060287, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.01888892985880375, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.04896702617406845, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.03766615316271782, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.027995849028229713, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.021493569016456604, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.02210809476673603, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02561993896961212, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.018228154629468918, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.013063322752714157, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.011501466855406761, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.012851133942604065, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.00736294686794281, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.006655475124716759, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.006534069310873747, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.005597748793661594, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006664694752544165, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006016298662871122, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.003939866088330746, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004398501478135586, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.019812457263469696, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.013230650685727596, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.009637868031859398, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.008793679066002369, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.00804995559155941, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.005358180496841669, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.012520660646259785, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.010914275422692299, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.00903914961963892, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.005779339466243982, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.006002173759043217, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0062923738732934, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0054542492143809795, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.004276023246347904, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.003953662235289812, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.003393246792256832, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.002925744280219078, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0028340413700789213, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.002662592101842165, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0025089785922318697, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.002229386940598488, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00263057928532362, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0019381035817787051, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0023699025623500347, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.0667378306388855, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.06270226836204529, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.06137606501579285, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.05587923899292946, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.029903123155236244, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.02866811864078045, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.03321341052651405, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.03077983297407627, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.030259190127253532, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.02732865884900093, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.026093173772096634, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.016866201534867287, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.014829428866505623, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.014456341043114662, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.014365770854055882, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.008509611710906029, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.007915896363556385, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.007890736684203148, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.007459037937223911, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.007407423108816147, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.004794086795300245, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.005434543825685978, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.004672669339925051, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.004340998362749815, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.09846413135528564, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.09259997308254242, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.0907411128282547, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.08266901969909668, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.04408002272248268, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.04231494292616844, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.04892553761601448, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.045272503048181534, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.044626884162425995, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.04027501866221428, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.038460873067379, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.024688009172677994, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.02147577702999115, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.020972829312086105, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.02085729129612446, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.012328688986599445, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.010921942070126534, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.010885031893849373, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.010193773545324802, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.010120180435478687, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.006583097390830517, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.006796928122639656, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.00640773493796587, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.004776574671268463, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.06015477329492569, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.04993503540754318, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.033935628831386566, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.03094068169593811, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.025315141305327415, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.016751009970903397, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.04623857140541077, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.04210292175412178, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.027048060670495033, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.02176564931869507, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.02202645130455494, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.02146664820611477, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.019507139921188354, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.013974974863231182, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.012008149176836014, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.012028724886476994, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.009392043575644493, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.008358604274690151, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.008938092738389969, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.0077541060745716095, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.008588718250393867, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.00867276731878519, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.007166064344346523, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.007183261215686798, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.015297109261155128, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.012177308090031147, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.009404495358467102, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.008255986496806145, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.00658127898350358, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.004445814527571201, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.011844627559185028, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.009450945071876049, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.00713415676727891, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.005399566143751144, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.005729301832616329, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.006282583344727755, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.004673189949244261, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0032910164445638657, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0028480160981416702, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.003240451216697693, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.0018677806947380304, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0016685331938788295, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.001667827251367271, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0014016063651069999, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.001740102656185627, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0015544185880571604, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0010987541172653437, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.001129292999394238, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.014351644553244114, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.011195494793355465, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.008681508712470531, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.007604974787682295, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.006017720326781273, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.004139728844165802, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.00974363461136818, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.008585063740611076, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.006601280067116022, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.004824838135391474, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.00491407560184598, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.004948823247104883, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.004113276023417711, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0029747935477644205, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0026401253417134285, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0024909425992518663, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.001671251724474132, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0015296783531084657, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.0014658595900982618, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.001258642179891467, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0013464237563312054, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0013528994750231504, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.000946429034229368, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.000986910192295909, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.0781097561120987, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.06593295931816101, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.060703445225954056, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.05221259966492653, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.034952592104673386, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.029865529388189316, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.043330226093530655, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.03978097438812256, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.03671231493353844, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.028257623314857483, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.02644624374806881, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0221213661134243, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.019111357629299164, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.016882702708244324, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.01632368192076683, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01110146939754486, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.008924904279410839, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.008722372353076935, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.007637456059455872, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.007267136592417955, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.005865219049155712, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006015868857502937, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.005098788999021053, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004203918389976025, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1400069296360016, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.08921924233436584, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.06491383165121078, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.06109970062971115, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.059783000499010086, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.037357524037361145, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.08338122069835663, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.07459625601768494, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06723549962043762, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.03814107924699783, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04034438729286194, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.04323119297623634, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03696749359369278, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.030378106981515884, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.028650730848312378, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.022276848554611206, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.018194830045104027, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.017749974504113197, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01489193830639124, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01381638552993536, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.013100716285407543, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.014742149971425533, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.011187936179339886, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.012336342595517635, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.10237709432840347, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.09730079770088196, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.0957641750574112, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.08775953203439713, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.045514076948165894, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.04409359395503998, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.05020798742771149, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.04635791480541229, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.04592353105545044, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.042158447206020355, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.04057018458843231, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.025453416630625725, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.022145170718431473, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.021783316507935524, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.021695470437407494, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.012759375385940075, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.011674266308546066, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.01165183074772358, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.011072810739278793, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.011023957282304764, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.007095684297382832, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.007696755230426788, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.0069821542128920555, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.005919117014855146, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.12952379882335663, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.12344343215227127, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.12163671106100082, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.11190807074308395, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.0578293614089489, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.056102585047483444, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.06367047131061554, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.058848243206739426, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.058329641819000244, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.05369957908987999, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.051633547991514206, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.03200290724635124, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.027767715975642204, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.02733493410050869, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.02723667584359646, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.01591869629919529, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.013923471793532372, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.013895521871745586, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.013118569739162922, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.013055057264864445, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.008287563920021057, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.008197899907827377, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.008140865713357925, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.005256240256130695, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.10747578740119934, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.09665163606405258, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.08805419504642487, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.07991590350866318, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.048343807458877563, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.04171436280012131, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.0646364763379097, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.05883430317044258, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.049759987741708755, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.04315577447414398, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.041824571788311005, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.033288683742284775, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.029137760400772095, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.024425728246569633, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.02254236862063408, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.01807583123445511, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.013804626651108265, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.012567272409796715, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.01294221542775631, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.011528490111231804, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.011101420037448406, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.010818185284733772, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.008828088641166687, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.007781564723700285, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.03491002693772316, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.03167295455932617, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.03043297491967678, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.027233747765421867, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.015544683672487736, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.014431249350309372, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.018695002421736717, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.016743233427405357, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.016114935278892517, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.013786221854388714, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.013105756603181362, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.009538935497403145, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.008026309311389923, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.007462743669748306, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.007318821735680103, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.004822685848921537, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.003921436611562967, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.003883073339238763, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.0035846810787916183, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0034944703802466393, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.002599662635475397, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.002555561251938343, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0023395316675305367, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0018004211597144604, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.027231939136981964, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.024936385452747345, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.023972829803824425, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.021404748782515526, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.01222875714302063, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.011363607831299305, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.01439374778419733, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.013142249546945095, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.012499646283686161, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.010815526358783245, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.010233822278678417, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.007256454788148403, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0062560406513512135, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.005851226858794689, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.005752613767981529, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0036347389686852694, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.003050363389775157, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.003010509302839637, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.002774470718577504, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0027127214707434177, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.001934744999743998, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0019446881487965584, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0017900329548865557, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0013367384672164917, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11468714475631714, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10697409510612488, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10450346022844315, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09447052329778671, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.052537400275468826, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.049906667321920395, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05875959247350693, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05430690199136734, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0533435195684433, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04717814549803734, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04470021650195122, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.029729902744293213, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.025829454883933067, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02503463625907898, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.024841560050845146, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014828979969024658, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012739704921841621, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012679625302553177, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01165094505995512, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011530768126249313, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0076529718935489655, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007524123881012201, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007361258380115032, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004713654518127441, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12360271066427231, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10525275021791458, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09896928071975708, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08096836507320404, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.056637175381183624, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.049984160810709, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0669359415769577, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.060872260481119156, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05854153260588646, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04427364841103554, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03924248367547989, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.034490760415792465, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.029725845903158188, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02784089185297489, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.027374235913157463, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017626410350203514, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015526462346315384, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.015390202403068542, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.0133420554921031, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013061408884823322, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.010231923311948776, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01111290417611599, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009685919620096684, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.008973972871899605, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1316085308790207, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1252697855234146, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.12344478070735931, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.11374091356992722, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.060102690011262894, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.05818958207964897, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.0663641020655632, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.06119060516357422, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.060671668499708176, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.05564781278371811, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.05373680219054222, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.03374408185482025, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.02920455113053322, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.028732366859912872, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.02862444892525673, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.0169164277613163, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.015052913688123226, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.015019960701465607, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.014197412878274918, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.014127827249467373, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.009213012643158436, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.009450430050492287, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.009061350487172604, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.006797719281166792, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17111873626708984, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16314159333705902, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16076835989952087, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14830732345581055, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.07835428416728973, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07589355111122131, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.08645372837781906, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.07975320518016815, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.07906030863523483, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07256424427032471, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07001785933971405, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04372675344347954, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.037859998643398285, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03724941238760948, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.037100620567798615, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.021845221519470215, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.019041171297430992, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.019002238288521767, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.01788424141705036, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01780306175351143, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.011529208160936832, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.011265824548900127, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01133771613240242, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.007304472383111715, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.1622205674648285, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.14743371307849884, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.14161062240600586, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.12853112816810608, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.07324810326099396, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.06773073971271515, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.08538129180669785, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.0786665603518486, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.07530329376459122, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.06501694023609161, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.06228053569793701, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.04332806542515755, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.03752722591161728, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.0351007804274559, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.03451639413833618, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.02170174941420555, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.018357308581471443, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.018147684633731842, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.01675749570131302, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.016371583566069603, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.011647677980363369, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.011774679645895958, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01085997186601162, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.00818010326474905, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.041474975645542145, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.038439370691776276, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.03693071007728577, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.033190205693244934, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.01902342401444912, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.017680741846561432, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.02482028119266033, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.020550666376948357, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.019354678690433502, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.016958443447947502, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.016282157972455025, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.012688507325947285, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.009896215051412582, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.009148521348834038, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.008957943879067898, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0063998824916779995, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.004797196947038174, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.004707360174506903, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.004392907023429871, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.00427559670060873, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.003397349501028657, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.003112941747531295, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0028771068900823593, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0021650029812008142, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.03332053869962692, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.03084319829940796, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.029635032638907433, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.02661111019551754, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.015245595946907997, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.014199711382389069, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.018020546063780785, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.016464870423078537, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.015525082126259804, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.013560193590819836, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.012954773381352425, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.009136133827269077, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.007865252904593945, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.007330961525440216, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.007202922832220793, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.004577811807394028, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.0038551129400730133, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.003798124613240361, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.0035280876327306032, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0034477263689041138, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.002445456339046359, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0025030598044395447, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0022635089699178934, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0017685353523120284, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1605890840291977, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.15021762251853943, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.14685063064098358, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.13283830881118774, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.07451819628477097, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07079146802425385, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.08401405066251755, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.07702448964118958, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0756354033946991, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06690420210361481, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06364401429891586, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.042613860219717026, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03672479838132858, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.035588283091783524, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.035307127982378006, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.021287089213728905, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.018121736124157906, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0180329792201519, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.016558198258280754, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01638360694050789, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.011029227636754513, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.010702211409807205, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.010587693192064762, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006696055177599192, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1280321627855301, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10483646392822266, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.0909518450498581, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07390256971120834, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05774457007646561, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04600394144654274, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.08525008708238602, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.07217221707105637, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06070147827267647, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04527048021554947, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04314819350838661, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.044245388358831406, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.036518681794404984, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03025432489812374, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.028529826551675797, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02428808994591236, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01926499977707863, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.018608782440423965, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01743292808532715, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.016532571986317635, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016002755612134933, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01634792424738407, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014103795401751995, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014328167773783207, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1786346733570099, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16957339644432068, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16680680215358734, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1531972438097, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.0833451971411705, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08038210868835449, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09211558103561401, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08502412587404251, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08422905206680298, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07658575475215912, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07365857064723969, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04719787463545799, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.041039206087589264, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04029072821140289, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04011557251214981, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.023638712242245674, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02162143588066101, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.021566245704889297, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02035151608288288, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.020239796489477158, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01310315914452076, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014274449087679386, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012871398590505123, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010965579189360142, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.20647548139095306, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1960640251636505, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1929672807455063, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17717748880386353, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09606187045574188, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09262607991695404, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1061004027724266, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09796685725450516, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09704895317554474, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08822169154882431, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08480612188577652, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05402085930109024, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.046784866601228714, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.045933302491903305, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.045741625130176544, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.026957828551530838, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.023762447759509087, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02370663918554783, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02220958285033703, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.0220746211707592, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014393022283911705, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01450846903026104, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014112662523984909, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009920218028128147, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.04597829654812813, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.042386990040540695, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.02595404163002968, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.02375846542418003, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.01914088986814022, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.010592995211482048, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.03927634283900261, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.03680573031306267, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.019505225121974945, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.017828797921538353, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.01760942116379738, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.016737934201955795, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.015969378873705864, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.0057154688984155655, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.006126147694885731, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.0038117149379104376, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.0040710316970944405, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.003124223556369543, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.003789686132222414, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.0027453049551695585, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.0033355457708239555, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.0033869119361042976, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.0020100967958569527, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.0018094051629304886, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.05934372916817665, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.054551925510168076, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.05261603742837906, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.047182533890008926, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.02709214761853218, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.02527359127998352, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.03131216764450073, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.02885541506111622, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0276480745524168, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.023963134735822678, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.02270752750337124, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.01585068367421627, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.013753675855696201, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.01297498308122158, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.012784094549715519, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.007924646139144897, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.0067182183265686035, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.006641285493969917, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.006096059922128916, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.005976834334433079, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00415010517463088, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0042009735479950905, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0038876854814589024, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0028244666755199432, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.05134306102991104, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.04717943072319031, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.04543443024158478, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.04073372483253479, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.023435650393366814, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.021810799837112427, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.027333257719874382, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.02506696619093418, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.02391582541167736, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.020724069327116013, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.019696908071637154, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.013838548213243484, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.011945480480790138, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.011221138760447502, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.011046446859836578, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0069190277718007565, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.005800805985927582, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.005728795658797026, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.005261966492980719, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.005151113960891962, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0036174815613776445, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0036210452672094107, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0033740417566150427, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0024085193872451782, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.16752910614013672, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1561085283756256, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1524302214384079, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1377386897802353, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.07769009470939636, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07368597388267517, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.08719614148139954, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.08045505732297897, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.07897781580686569, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06953682750463486, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06586192548274994, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.04422716796398163, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03834356367588043, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03710619732737541, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.036811597645282745, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02205909974873066, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01887960359454155, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01879125088453293, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01719912327826023, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01701200008392334, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.011368879117071629, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.011142974719405174, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.010911841876804829, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006943785585463047, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1656087189912796, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.14766304194927216, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.14084525406360626, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.12237315624952316, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.07754281163215637, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07036252319812775, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0911678597331047, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.083323173224926, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.07975700497627258, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06505420058965683, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.060344595462083817, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.04731057211756706, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04068077355623245, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.038014303892850876, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.037363506853580475, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.023925015702843666, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.020840134471654892, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02061653509736061, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.018523113802075386, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.018128085881471634, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.013466836884617805, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.014557886868715286, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01264302060008049, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011427861638367176, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.16890433430671692, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1596900373697281, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1569811999797821, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14366286993026733, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.07860510796308517, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07554487884044647, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.08673294633626938, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08028481900691986, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.07953805476427078, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07174373418092728, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06852272897958755, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04404117166996002, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.038320206105709076, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.037568189203739166, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.037395115941762924, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.021992690861225128, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.019347697496414185, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.019297216087579727, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.01796240732073784, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.017853282392024994, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.011549989692866802, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.011674335226416588, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01130021084100008, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.007811521179974079, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.21279272437095642, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.20132675766944885, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.19793148338794708, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.18119287490844727, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09909728914499283, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09525415301322937, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10925726592540741, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10117679089307785, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10025244206190109, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09043177217245102, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08640331029891968, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05539674311876297, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.048187896609306335, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.047251518815755844, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04703009873628616, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.027628101408481598, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.024018263444304466, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02395956963300705, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02224169299006462, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.0221025962382555, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01429009810090065, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01401489693671465, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013979125767946243, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00875266082584858, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.20734062790870667, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.18864187598228455, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.18189461529254913, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.16264866292476654, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.09504532814025879, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.0882074162364006, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.10904906690120697, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.10062247514724731, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.09737566113471985, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.08323937654495239, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.07856496423482895, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.05542724207043648, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.04807940497994423, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.045570243149995804, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.04497896507382393, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.02772478573024273, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02369658835232258, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.023496560752391815, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.021373165771365166, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.020972412079572678, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.014747667126357555, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.014904092997312546, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.013945287093520164, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.010216845199465752, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.06502421200275421, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.059741418808698654, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.057451728731393814, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.051451072096824646, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.02978481538593769, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.027683474123477936, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.03455105051398277, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.031940609216690063, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.03040178306400776, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.026282135397195816, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.024827679619193077, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.017500193789601326, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0152524309232831, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.014296799898147583, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.01406787522137165, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.008764456957578659, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.007461813744157553, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.007366627920418978, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.006770740263164043, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.006626038812100887, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.004621733911335468, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.004753540735691786, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.004305114503949881, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0032760347239673138, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.055518344044685364, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.050983719527721405, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.04892081767320633, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.04379843920469284, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.02544127218425274, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.023592239245772362, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.029925355687737465, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.027464551851153374, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.02597547508776188, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.02246222458779812, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.02133815735578537, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.015171334147453308, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0131094790995121, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.012211908586323261, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.011997482739388943, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0075932396575808525, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.006349791772663593, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.006259050685912371, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.005759134888648987, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.005623320117592812, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00399277126416564, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.004034085664898157, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0036928970366716385, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.002738445531576872, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.18159280717372894, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1689242273569107, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.16477033495903015, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.14864440262317657, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.08447340875864029, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07995165139436722, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.09514519572257996, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.08763550221920013, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.08588375896215439, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07536423206329346, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07125728577375412, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.048318881541490555, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04181186109781265, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04037616774439812, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04003620520234108, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.024104896932840347, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02057240530848503, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.020465368404984474, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01869327947497368, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.018474847078323364, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.012457475997507572, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.012187951244413853, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01192346028983593, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.007637436036020517, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.18977555632591248, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.16605785489082336, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.15729843080043793, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.12766696512699127, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.08843953162431717, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07926163077354431, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1047520861029625, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09524224698543549, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09126793593168259, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07068421691656113, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06224825978279114, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05397452041506767, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04651586711406708, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04343488812446594, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04267938435077667, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.027367452159523964, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02388942800462246, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.023641908541321754, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.020644178614020348, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.020169053226709366, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015494232065975666, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016768505796790123, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014570490457117558, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.013225171715021133, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17905113101005554, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1688847690820694, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16577595472335815, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15133826434612274, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08377257734537125, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08029316365718842, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.0926627516746521, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08575055003166199, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08483213931322098, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0760715901851654, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07251891493797302, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04720088467001915, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04103048890829086, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04015592858195305, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03994781896471977, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.0235860887914896, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.020848795771598816, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.020790472626686096, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019315937533974648, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019183870404958725, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012531474232673645, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.012839678674936295, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012243560515344143, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008893257938325405, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.22904805839061737, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21611852943897247, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2122150957584381, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19375427067279816, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10704877972602844, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10264569520950317, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11837033182382584, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10951662808656693, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10841488093137741, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09726230800151825, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09262924641370773, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.060047540813684464, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05222579091787338, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.051110658794641495, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05085025727748871, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.029983490705490112, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.025979220867156982, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02591274492442608, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02395925670862198, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02379201166331768, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015527683310210705, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015172775834798813, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01514635793864727, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009450328536331654, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.22586841881275177, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.20495392382144928, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.19736789166927338, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.17628462612628937, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10377688705921173, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.09604892879724503, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.11959312856197357, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11014226078987122, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.10641510039567947, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.09067627042531967, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08552595227956772, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.060904473066329956, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05277036502957344, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.04989292845129967, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.049197763204574585, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.030583109706640244, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.026112353429198265, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.02587984688580036, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.023557987064123154, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.023096522316336632, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.016536056995391846, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01667766273021698, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.015620380640029907, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.011690924875438213, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.083217091858387, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.07724177837371826, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.07488688826560974, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.06745633482933044, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.038537587970495224, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.03624117746949196, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.04394640401005745, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04064439237117767, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.03918943181633949, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.03435796499252319, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03252078592777252, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02231944352388382, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.019441476091742516, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.018504582345485687, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.01827792078256607, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.011172079481184483, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.009631028398871422, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.00954418070614338, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.008803728967905045, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.008664164692163467, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.005899702664464712, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006059668958187103, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.005587911698967218, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004184795543551445, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.06864383071660995, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.06377817690372467, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.06171172857284546, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.05562780797481537, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.03180219978094101, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.029865257441997528, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.03674774989485741, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.033743713051080704, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.032341938465833664, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.028392929583787918, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.027022501453757286, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.018653223291039467, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.016114376485347748, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.015253519639372826, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.015047661028802395, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.009330620989203453, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.007891536690294743, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.007803373038768768, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.007211761083453894, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.00707638543099165, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0048910388723015785, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.004913287702947855, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.004601228982210159, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0032892581075429916, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.19779245555400848, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.18476873636245728, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.18058723211288452, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.16326220333576202, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09235808253288269, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08768898248672485, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10344153642654419, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09544169157743454, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09376882761716843, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08273579180240631, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0782870277762413, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05252334102988243, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0455169752240181, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.044110532850027084, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04378077760338783, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02618890255689621, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.022430386394262314, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0223208200186491, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.020449787378311157, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.020230863243341446, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.013483796268701553, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.013188331387937069, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01295731496065855, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.008172649890184402, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1801692247390747, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1516251415014267, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1393575519323349, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11572219431400299, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0828162208199501, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07015655934810638, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10222913324832916, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0933559387922287, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0864284560084343, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06480314582586288, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.058531031012535095, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.052990589290857315, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0453910306096077, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.040599897503852844, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.03942622244358063, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.026721568778157234, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.022317728027701378, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02187768928706646, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.019049789756536484, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.018277643248438835, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01499917358160019, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01592998579144478, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01351308822631836, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.012322782538831234, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17898240685462952, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16863644123077393, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16552534699440002, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15089574456214905, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08379533141851425, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08023678511381149, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09269330650568008, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08589284121990204, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08487701416015625, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07593929022550583, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.0722646713256836, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.047124650329351425, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04102562740445137, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.040091801434755325, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03987607732415199, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.023518892005085945, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.020605918020009995, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.020542947575449944, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019012214615941048, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.018876003101468086, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01231932919472456, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.012400302104651928, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012008736841380596, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008214163593947887, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23101718723773956, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21775685250759125, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2137337625026703, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19494880735874176, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10815809667110443, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10356330126523972, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11964914947748184, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11076467484235764, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10954414308071136, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09805665910243988, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09331510961055756, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.060742590576410294, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05283253639936447, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.051649872213602066, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.051372136920690536, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.030297139659523964, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.026258857920765877, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02618267945945263, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02417350746691227, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02399376966059208, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01568874530494213, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015356467105448246, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015284298919141293, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009560600854456425, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.2224113792181015, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.19949446618556976, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.19083508849143982, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.16960997879505157, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.1018945723772049, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.09317406266927719, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.11890022456645966, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.10941693186759949, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.10485586524009705, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.08787563443183899, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08288867026567459, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.060467250645160675, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05241012200713158, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.04902999475598335, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.04820738732814789, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.030298206955194473, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.025747884064912796, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.025460202246904373, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.023029277101159096, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.02248183637857437, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.016327280551195145, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.016650471836328506, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.015256011858582497, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.011736223474144936, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.08486171066761017, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.07893215864896774, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.07658407837152481, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.06899373978376389, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.03921035677194595, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.03692341223359108, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.04470531642436981, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04130293428897858, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.03984946385025978, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.035015542060136795, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03317410498857498, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.022676018998026848, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.01972518488764763, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.01879001036286354, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.018567116931080818, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.011340592056512833, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.009703864343464375, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.009613189846277237, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.008860285393893719, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.00871727429330349, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.005931334104388952, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00599268451333046, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0056120725348591805, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.003999749664217234, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.07095687836408615, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.06603436172008514, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.06393452733755112, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.0576782189309597, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.03282616659998894, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.030841834843158722, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.03773188963532448, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.034828122705221176, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.033357199281454086, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.02934054657816887, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.02787485532462597, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0191416647285223, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.016620894894003868, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.015724075958132744, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.015505462884902954, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.009566535241901875, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.008114251308143139, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.008024323731660843, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.007414845749735832, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.007278106641024351, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.004995274357497692, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.005022566765546799, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.004695298615843058, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0033280369825661182, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.18780522048473358, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.17558133602142334, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1715547740459442, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.15528295934200287, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.08754219114780426, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08314202725887299, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.09824357181787491, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09045996516942978, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.08885889500379562, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07847452163696289, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07437889277935028, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.04989171028137207, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04315263032913208, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.041804324835538864, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04149175435304642, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.024893682450056076, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.021258395165205002, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.021151509135961533, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.019388461485505104, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.019183438271284103, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.012813473120331764, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.012501494958996773, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.012287609279155731, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.007749432232230902, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.20445460081100464, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.17214299738407135, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.16084249317646027, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.13447923958301544, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09358037263154984, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08116224408149719, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11188827455043793, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10216904431581497, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09772714227437973, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07366430759429932, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06695562601089478, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05777006596326828, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05002473667263985, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04621409997344017, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04527943953871727, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.029389917850494385, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02581612952053547, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.025558698922395706, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02226189151406288, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.021679406985640526, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01691444031894207, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018603118136525154, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01579047180712223, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014976892620325089, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.16518868505954742, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1556089222431183, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.15263032913208008, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.13917940855026245, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.07729338854551315, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07393135875463486, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.0858280137181282, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.07928632944822311, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.07828651368618011, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0700395405292511, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06675292551517487, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.043609488755464554, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.03795258328318596, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03704896196722984, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.036842454224824905, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.021856362000107765, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.019187772646546364, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.019131112843751907, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.017736021429300308, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.017603933811187744, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.011623257771134377, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.011770812794566154, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.011329087428748608, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008071614429354668, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.22386407852172852, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21107403934001923, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2071087807416916, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.18898072838783264, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10497206449508667, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10052475333213806, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1164190024137497, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10758072882890701, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10632884502410889, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09513969719409943, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.0907936617732048, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.059277307242155075, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05146683752536774, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.050295889377593994, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.050014618784189224, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02962706983089447, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.025976821780204773, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.025899961590766907, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02399996854364872, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.023829635232686996, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015659494325518608, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015813453122973442, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015276151709258556, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010707719251513481, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.21481235325336456, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.19134411215782166, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.1826314777135849, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.161358043551445, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.098166324198246, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.08940081298351288, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.11592934280633926, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.10565782338380814, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.10121306777000427, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.08400103449821472, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.07905909419059753, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.058885082602500916, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.050886306911706924, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.04751673713326454, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.04669838771224022, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.02982734516263008, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02545495703816414, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.025177080184221268, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.022775985300540924, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.022243773564696312, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.016694897785782814, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017079945653676987, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.015657220035791397, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.0127195343375206, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.0999458059668541, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09305307269096375, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09038392454385757, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.0814385786652565, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04622037336230278, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04358900710940361, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05266820639371872, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04854058101773262, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04696691781282425, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04129170626401901, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03914712741971016, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.026678452268242836, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02317645214498043, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02214798703789711, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.021901601925492287, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013347934000194073, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011447995901107788, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011353068985044956, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01045875996351242, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010303680785000324, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006999694276601076, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007076325826346874, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0066500757820904255, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0047492170706391335, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.08042289316654205, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.07493285089731216, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.07264040410518646, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.06545735895633698, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.03722529485821724, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.035042110830545425, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.042704589664936066, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.03937758505344391, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.03781897947192192, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.03328108787536621, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03161865472793579, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02165103331208229, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.01879194937646389, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.017832139506936073, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.017606332898139954, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.010819326154887676, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.009209849871695042, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.009107664227485657, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.008415444754064083, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.008268865756690502, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.005672765430063009, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.005699898581951857, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.005361505318433046, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00378821836784482, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2011396288871765, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.18810321390628815, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1838153898715973, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.16609731316566467, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09376338869333267, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08906655758619308, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10543869435787201, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09685146808624268, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09515978395938873, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08399976789951324, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07972744107246399, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05355753377079964, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.046182312071323395, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04476763308048248, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.044428836554288864, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.026710795238614082, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.022752152755856514, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02264145389199257, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02073698304593563, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.020517010241746902, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.013763797469437122, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.013363485224545002, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.013188272714614868, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00826133880764246, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.18403257429599762, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.16395337879657745, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.15676438808441162, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.13298627734184265, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0855785682797432, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07823152095079422, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.09918826073408127, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09127715229988098, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.08815546333789825, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07119527459144592, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06512413173913956, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05149253457784653, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.044671058654785156, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04210247844457626, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04148039221763611, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.026240838691592216, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.023306146264076233, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.023117227479815483, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.020694740116596222, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.020313167944550514, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015164799988269806, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01645873300731182, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01443630363792181, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.013178354129195213, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.153326153755188, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.14442405104637146, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.14158082008361816, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.12891529500484467, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.07204418629407883, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.06889242678880692, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.08006244897842407, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.07399441301822662, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.0729978084564209, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.06518666446208954, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.062116775661706924, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04089735448360443, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.03547960892319679, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.0346161350607872, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03440674766898155, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02042528986930847, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.01807451993227005, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.018013970926404, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.01671650819480419, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.016591329127550125, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01096758060157299, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.011314848437905312, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.010688532143831253, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008001002483069897, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.22087885439395905, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.20808525383472443, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.20411288738250732, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1860351860523224, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10353761166334152, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09902770817279816, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11522113531827927, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10616905242204666, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10490848869085312, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09369228780269623, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08930615335702896, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05856424570083618, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0506829172372818, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04949622601270676, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0492163822054863, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02926003932952881, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.025309816002845764, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02523140422999859, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.023301903158426285, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02311554364860058, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015461902134120464, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015030820854008198, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01506965421140194, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009661510586738586, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.2144084870815277, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.19346743822097778, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.18560875952243805, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.16534337401390076, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.09842947125434875, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.09054169803857803, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.11477017402648926, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.10524067282676697, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.10117153823375702, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.08542735874652863, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08063990622758865, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.05842892825603485, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.050554439425468445, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.047494519501924515, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.0467463955283165, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.029354242607951164, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02517377957701683, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.02491469867527485, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.022672735154628754, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.022187015041708946, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.016034988686442375, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01652827300131321, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01504599116742611, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012004869990050793, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10389009118080139, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09686729311943054, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09408215433359146, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08497390151023865, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04829109087586403, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04557204991579056, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05494982749223709, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05070514976978302, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0490388385951519, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.043215371668338776, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.041014183312654495, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.027987278997898102, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02435164898633957, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.023265855386853218, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02300192043185234, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014029878191649914, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012239590287208557, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012136504054069519, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011247099377214909, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011088797822594643, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007489357143640518, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00785716064274311, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00712699955329299, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005628722719848156, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.08749889582395554, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.08163429796695709, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.07923588901758194, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07158859819173813, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0406680703163147, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.03832513466477394, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.04653910547494888, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04287192225456238, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04129951819777489, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.036416973918676376, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03464815020561218, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.023645823821425438, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.020493533462285995, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.01950485073029995, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.019266555085778236, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.011823328211903572, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.010082578286528587, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.00998249277472496, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.009229163639247417, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.00907891895622015, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006202543154358864, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006241742521524429, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.005864002276211977, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0041815126314759254, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.20794029533863068, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.19472798705101013, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.19032399356365204, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.17231979966163635, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09721577167510986, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09236878156661987, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10933224856853485, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10044476389884949, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0985940620303154, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08721107989549637, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08275879174470901, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.055563583970069885, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04795469343662262, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04646367207169533, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04610155522823334, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0277392640709877, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.023660380393266678, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.023542840033769608, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.021602336317300797, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.021384136751294136, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.014331649988889694, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.013971930369734764, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.013739374466240406, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.008737990632653236, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.21059349179267883, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1794869303703308, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.16696114838123322, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.13618913292884827, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09666718542575836, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08451738953590393, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11925476789474487, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10857442766427994, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10076811164617538, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07632825523614883, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06879197061061859, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.061216048896312714, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.053286951035261154, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04799947142601013, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.046686165034770966, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.031162485480308533, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027218438684940338, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026771949604153633, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.023688992485404015, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.022896546870470047, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01804930344223976, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.02022923156619072, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016499904915690422, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.016502900049090385, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17818805575370789, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1678045392036438, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16454802453517914, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14980433881282806, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08374688029289246, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.0800468772649765, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09292284399271011, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08600349724292755, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08483124524354935, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07575495541095734, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07205391675233841, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04742920771241188, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.041260719299316406, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.040235165506601334, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03999411314725876, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.023703383281826973, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.020964236930012703, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02089657634496689, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019378909841179848, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01923166960477829, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012595812790095806, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013052950613200665, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012255021370947361, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00915750302374363, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23378147184848785, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22025029361248016, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21605268120765686, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19681772589683533, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10968539863824844, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10488095134496689, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1217254251241684, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11252998560667038, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11113238334655762, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09920348972082138, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09441058337688446, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06184568256139755, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.053729455918073654, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05244375020265579, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.052131105214357376, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03089931234717369, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.0267366673797369, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02664610743522644, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02458450198173523, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024388374760746956, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016089823096990585, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015752434730529785, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015650589019060135, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009953116998076439, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.23872779309749603, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2141343504190445, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.20463959872722626, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.18112210929393768, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10986664146184921, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.10033821314573288, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12847726047039032, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11808939278125763, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.11307355016469955, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.09446900337934494, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08881882578134537, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06573013216257095, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.056896746158599854, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.053190071135759354, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05228985473513603, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03311997279524803, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.028427964076399803, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.028115948662161827, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.025502372533082962, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.0249183252453804, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018261689692735672, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.019005892798304558, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.017070116475224495, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014103307388722897, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11422240734100342, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10639255493879318, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10343839228153229, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.0932815745472908, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0530124194920063, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0500391349196434, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06017662584781647, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05557143688201904, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.053832948207855225, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04738445580005646, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.044857222586870193, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03056834079325199, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02658846415579319, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.025429178029298782, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.025152895599603653, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015290815383195877, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013193521648645401, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013084616512060165, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012078081257641315, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011904984712600708, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008052541874349117, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008224699646234512, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007662835530936718, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005616458598524332, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09214432537555695, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.08587214350700378, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08326935768127441, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07508070766925812, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.042727112770080566, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.040222667157649994, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.04894091933965683, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.045178331434726715, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04339348152279854, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.03821837157011032, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03625357523560524, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02480325661599636, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.021575774997472763, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.020482005551457405, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.020214350894093513, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.012414306402206421, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.010579155758023262, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.010467508807778358, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.009675174951553345, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.009509016759693623, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006509722210466862, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006555845960974693, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006154337432235479, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0043714954517781734, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.20865514874458313, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.19499604403972626, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1903030276298523, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.17202739417552948, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09733307361602783, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09231357276439667, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10952883213758469, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10099715739488602, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0988042801618576, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0871918722987175, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08249787986278534, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05560761317610741, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04818461462855339, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04650436341762543, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.046120256185531616, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02773503214120865, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02370677888393402, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.023566853255033493, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02161305770277977, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.021359043195843697, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.014327922835946083, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01406027190387249, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.013710811734199524, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.008817861787974834, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.20625193417072296, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.18207243084907532, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.17352895438671112, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1513231098651886, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09564477205276489, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0860319584608078, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11178965866565704, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.1026918962597847, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0989641398191452, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08049339801073074, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07375138998031616, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05775400996208191, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05005786940455437, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04683489724993706, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04606631398200989, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02919062227010727, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.025675443932414055, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.025436265394091606, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02284896746277809, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.022376470267772675, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016419513151049614, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017866207286715508, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01547442190349102, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014001318253576756, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.149992436170578, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.14099346101284027, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.13816186785697937, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.125713512301445, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.07052326202392578, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.06734989583492279, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.07838898152112961, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.07251302152872086, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.07147091627120972, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0636911541223526, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.060599185526371, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04006200283765793, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.034847550094127655, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.033960696309804916, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.033751942217350006, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02007092349231243, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.017818253487348557, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.017758971080183983, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.016481710597872734, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.016349762678146362, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.010799190029501915, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.011270611546933651, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.010507823899388313, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008107634261250496, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.22289837896823883, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2097025215625763, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.20562826097011566, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.18711799383163452, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10448689013719559, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.0998278483748436, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11601949483156204, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10722707957029343, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10587097704410553, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09435703605413437, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08972244709730148, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05885891988873482, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05120245739817619, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04996231570839882, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04966162145137787, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.029451683163642883, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.025488736107945442, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.025401795282959938, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.023406771942973137, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.023217905312776566, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015380214899778366, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015048540197312832, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01495671272277832, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009549137204885483, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.2199844866991043, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.20016010105609894, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.19296559691429138, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.17187926173210144, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10130026936531067, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.09390879422426224, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.11624006181955338, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.10731702297925949, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.10373794287443161, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.08847663551568985, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08330860733985901, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.059340205043554306, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.051521580666303635, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.04880155622959137, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.04813603684306145, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.029801782220602036, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.025746552273631096, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.025518666952848434, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.023265233263373375, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.022829586640000343, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.016206789761781693, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.016691861674189568, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.015361536294221878, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012020367197692394, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1036330834031105, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09666847437620163, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09388869255781174, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08464208245277405, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04816151410341263, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04541551321744919, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05488090589642525, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05066361278295517, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04891147091984749, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.043066177517175674, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0408158153295517, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.027876274660229683, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02421611174941063, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02308671735227108, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.022812534123659134, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013936690986156464, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011930638924241066, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011821391992270947, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010909429751336575, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010738389566540718, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007306591607630253, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007374827284365892, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006923340726643801, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004935332573950291, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.0862126499414444, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.08048422634601593, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.07790391147136688, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07032039761543274, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04007713869214058, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.03771211579442024, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.04599275812506676, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04253602772951126, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.040700532495975494, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.035881560295820236, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0340627059340477, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.023330992087721825, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.020299412310123444, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.019202785566449165, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.018935728818178177, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.011657553724944592, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.009897513315081596, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.00978227611631155, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.00904877670109272, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.008880635723471642, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0061072492972016335, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006111769936978817, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.005753844510763884, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004018615931272507, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.21326544880867004, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.19940046966075897, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1945198029279709, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.17578905820846558, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09966813027858734, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09449859708547592, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11262395232915878, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10359285026788712, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1011640802025795, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08931667357683182, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08453686535358429, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05716843158006668, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04942553862929344, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04766145348548889, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.047228820621967316, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.028534626588225365, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.024315612390637398, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.024161940440535545, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.022183334454894066, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.021909035742282867, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.014744431711733341, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01447254978120327, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014083041809499264, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00912491325289011, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.21521466970443726, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.18504121899604797, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.17043066024780273, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.14663109183311462, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09920943528413773, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08544383198022842, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12386870384216309, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11381825804710388, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10388865321874619, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08169376105070114, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07488565146923065, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06423763185739517, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05552331730723381, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04881563037633896, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04714800789952278, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03231135755777359, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027087882161140442, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02646603249013424, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02392195723950863, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02288259193301201, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017880938947200775, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01972338557243347, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015773441642522812, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.015385757200419903, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1787686049938202, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1681070625782013, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16464270651340485, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1497116982936859, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08416017889976501, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08032762259244919, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09375754743814468, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.0866406038403511, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08529099822044373, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07596082985401154, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07228922098875046, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04790946841239929, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0416267029941082, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04051114618778229, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04024394601583481, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.023922409862279892, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.021268032491207123, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.021194832399487495, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019666971638798714, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.0195038802921772, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012862388044595718, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013484236784279346, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012497478164732456, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009700984694063663, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23320236802101135, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21925358474254608, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21489092707633972, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1953689157962799, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.109418123960495, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10448715090751648, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12180136144161224, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1123972162604332, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11088006943464279, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09872633963823318, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09380266070365906, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06181085854768753, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05363484099507332, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.052284982055425644, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0519598014652729, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.030793575569987297, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.026667894795536995, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.026572316884994507, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024473557248711586, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02426844649016857, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01600692979991436, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015736864879727364, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015535963699221611, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009941518306732178, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.24735558032989502, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2218373715877533, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.2122281789779663, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.1868770569562912, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11411282420158386, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.10428869724273682, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13294853270053864, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.1222270280122757, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.11739195138216019, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.09766409546136856, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09143059700727463, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06811169534921646, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05879444628953934, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05508147180080414, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.0541812889277935, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03428894281387329, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.029171334579586983, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.0288708359003067, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.026002591475844383, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.02541843242943287, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01897542178630829, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.019157804548740387, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.017840033397078514, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.013904614374041557, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11591119319200516, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10812371224164963, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10508312284946442, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09474065899848938, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05384932830929756, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05084693059325218, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.061120860278606415, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05650690197944641, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05468951165676117, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04814814776182175, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.045579005032777786, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.031026216223835945, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.026973441243171692, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.025789624080061913, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0255027674138546, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015513181686401367, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013271600008010864, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01315381657332182, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012117797508835793, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011938363313674927, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008098508231341839, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008107728324830532, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007695189211517572, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005323934368789196, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09532671421766281, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.08900249004364014, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.0862538143992424, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07783587276935577, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04432147368788719, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.041737813502550125, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05072375386953354, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04691065847873688, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04500065743923187, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.03968735411763191, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03761833906173706, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02570526860654354, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.022398265078663826, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.021226197481155396, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02094244584441185, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.012850308790802956, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.010926044546067715, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.010803919285535812, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.009986319579184055, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.009809446521103382, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006714956369251013, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006713842507451773, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006339370273053646, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0043960390612483025, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.21647608280181885, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.20224158465862274, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.19714748859405518, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.17803041636943817, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10107249021530151, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0957268476486206, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11405908316373825, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10513962060213089, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1025804728269577, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09050127118825912, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08561646938323975, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05795956403017044, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.050245534628629684, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04836973175406456, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04791872203350067, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.028976185247302055, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.024740982800722122, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.024573327973484993, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02256726287305355, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.022278085350990295, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015065197832882404, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.014830735512077808, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014389006420969963, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009473065845668316, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2284284234046936, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.20274272561073303, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1928010880947113, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.16494165360927582, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10654076933860779, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09634043276309967, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12561142444610596, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11491590738296509, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10942524671554565, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08806803077459335, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0816456750035286, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06450321525335312, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.055909957736730576, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05216975882649422, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05126192420721054, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.032542336732149124, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028553137555718422, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.028228584676980972, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02516074664890766, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02459680289030075, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01819680631160736, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.019905949011445045, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017061203718185425, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.015561510808765888, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17767244577407837, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16695454716682434, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16345791518688202, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14851200580596924, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08357995748519897, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07974174618721008, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09309285134077072, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08605830371379852, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08473748713731766, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07539546489715576, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07169341295957565, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.047565825283527374, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.041401877999305725, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04030110687017441, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04003380984067917, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.023821750655770302, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02121513895690441, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.021136384457349777, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019609306007623672, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019452547654509544, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01281677931547165, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013515742495656013, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01246075052767992, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009804731234908104, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23284199833869934, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21880865097045898, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2144269347190857, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19480346143245697, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10921501368284225, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10420352220535278, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12139173597097397, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11218902468681335, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.1106705591082573, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09846216440200806, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09344761073589325, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06171238422393799, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.053560152649879456, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05220627412199974, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05188017711043358, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.030811795964837074, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.026594363152980804, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.026497066020965576, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02438262477517128, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024174999445676804, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01598597876727581, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01564609259366989, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01551087386906147, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009830521419644356, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.24991069734096527, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.22417351603507996, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.21497775614261627, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.18914714455604553, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11520688980817795, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.10553355515003204, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13342422246932983, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12261050939559937, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.11841796338558197, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.09857316315174103, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09214597195386887, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.0682259276509285, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.059028610587120056, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.0556429959833622, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.054833538830280304, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03446141630411148, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.029509276151657104, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.02925737202167511, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.026311280205845833, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.025778878480196, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01904844306409359, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.0193710345774889, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.018038712441921234, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014156115241348743, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12673595547676086, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1180826798081398, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11478379368782043, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10352820158004761, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.058961737900972366, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05563805252313614, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06683836132287979, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06175490841269493, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.059859514236450195, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05266899988055229, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04987417161464691, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.033981453627347946, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.029577717185020447, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02829248644411564, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02799006551504135, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017003484070301056, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01468441542237997, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014568349346518517, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013434630818665028, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013245880603790283, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008954917080700397, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009155862033367157, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008525888435542583, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006255714688450098, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10177292674779892, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.0949268490076065, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09183517843484879, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.082797572016716, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.047260962426662445, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04440389573574066, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05435195565223694, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05028458312153816, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.047985199838876724, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04227614402770996, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04010147601366043, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.027545452117919922, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.023999087512493134, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.022649018093943596, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.022325856611132622, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01377651747316122, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011681860312819481, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011540978215634823, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010682894848287106, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01047682948410511, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007203561253845692, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007234831340610981, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006778327282518148, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004773877095431089, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.22761976718902588, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.21255502104759216, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.20735859870910645, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.18720415234565735, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10634046792984009, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10076601058244705, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11984169483184814, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11041098833084106, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10795597732067108, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09514149278402328, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08998771756887436, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06088867038488388, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05277309939265251, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05087272822856903, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.050417330116033554, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03043922409415245, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026021212339401245, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.025860032066702843, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02373785525560379, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.023443736135959625, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01580181159079075, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015596602112054825, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015100741758942604, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009976974688470364, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2378852516412735, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2113010138273239, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.20235145092010498, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.17123451828956604, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.1109272763133049, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10107570886611938, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12954819202423096, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11803029477596283, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11424132436513901, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09213875979185104, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08433971554040909, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06705125421285629, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05766431242227554, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.054430730640888214, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.053672995418310165, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0340331569314003, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.029880288988351822, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.029649879783391953, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.0264156311750412, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.025937402620911598, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01944946125149727, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.020797472447156906, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.018547652289271355, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01639224775135517, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1723615676164627, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16177432239055634, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.15834543108940125, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14377254247665405, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08115054666996002, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07737130671739578, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09037738293409348, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08354976028203964, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08229266852140427, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07310491055250168, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06941858679056168, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04620828852057457, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.040217623114585876, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03913111612200737, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.038876961916685104, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.023154040798544884, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.020597606897354126, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.020519055426120758, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019021373242139816, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.018867801874876022, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012485731393098831, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013120739720761776, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012134524993598461, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00952265877276659, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2354280650615692, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22111919522285461, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21670925617218018, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1966915875673294, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11044253408908844, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10540667176246643, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12279949337244034, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11347794532775879, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11199178546667099, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09950228780508041, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09444773197174072, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06242845207452774, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05419296398758888, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05282970517873764, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.052502553910017014, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.031191375106573105, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.026926908642053604, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.026832804083824158, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024661971256136894, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024456461891531944, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016223065555095673, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015866942703723907, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015761159360408783, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010002766735851765, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.24317915737628937, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.218808114528656, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.2096937745809555, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.18524956703186035, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11198809742927551, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.10262554883956909, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13037633895874023, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11960862576961517, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.1150960922241211, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.0962689146399498, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.0902659147977829, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06631635129451752, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05742136016488075, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.053997885435819626, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05317830666899681, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.033290762454271317, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02850426733493805, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.028221553191542625, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.025451654568314552, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.024899663403630257, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018031353130936623, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018543371930718422, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01694018393754959, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.013299399055540562, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13035115599632263, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12166782468557358, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11834284663200378, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10681058466434479, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06068338081240654, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05734146013855934, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0687764436006546, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0635424479842186, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.061609357595443726, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05429590120911598, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05144995078444481, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03491302579641342, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.030373651534318924, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02906760759651661, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02874985709786415, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017456116154789925, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01497328095138073, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01485045999288559, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013685831800103188, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013489972800016403, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009104037657380104, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009169217199087143, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008659010753035545, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0060639348812401295, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10360167175531387, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09674623608589172, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09366432577371597, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08459970355033875, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.048189081251621246, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04535103216767311, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0552515871822834, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05116712674498558, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04892893135547638, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.043175872415304184, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04094819352030754, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02797839604318142, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.024401869624853134, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02307620644569397, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0227554552257061, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01397790014743805, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011859814636409283, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01171800121665001, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010845188051462173, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010638636536896229, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00728078605607152, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007278474047780037, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006857675965875387, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004722151439636946, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.23607565462589264, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22075414657592773, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.21527963876724243, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.19462355971336365, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11055507510900497, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10474717617034912, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12502846121788025, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11515617370605469, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11220370978116989, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09914394468069077, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.093897245824337, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06353357434272766, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0550878643989563, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.052917588502168655, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.052371323108673096, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.031747907400131226, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02700881101191044, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026807622984051704, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02465803548693657, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024324070662260056, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016478708013892174, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016135992482304573, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015706880018115044, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010183692909777164, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2198360711336136, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.20173746347427368, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.19544737040996552, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.17499229311943054, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10264736413955688, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09565116465091705, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1169072836637497, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10717169940471649, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1048009991645813, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08952213823795319, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08441709727048874, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06034189462661743, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.052294809371232986, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.050287410616874695, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04981903359293938, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.030765140429139137, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027510061860084534, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02738824114203453, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025063995271921158, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024781955406069756, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01768285594880581, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018913889303803444, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017094440758228302, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0149557339027524, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1672823578119278, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.15707604587078094, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.15375061333179474, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.13966181874275208, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.07880657911300659, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07513557374477386, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.08769677579402924, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08116035163402557, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.07989712059497833, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0709889754652977, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06737227737903595, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.044791340827941895, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.03898634761571884, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.037920992821455, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03767625615000725, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.022399259731173515, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.019844528287649155, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.01977248676121235, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.018297497183084488, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.018148884177207947, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.011951437219977379, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.012485643848776817, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.011604378931224346, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008884563110768795, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23461183905601501, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2204400599002838, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21603593230247498, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1962234377861023, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11028178036212921, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10521527379751205, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1224643886089325, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11327293515205383, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11178621649742126, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09933658689260483, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09421481192111969, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0622444674372673, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.054108377546072006, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05274263396859169, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05240939185023308, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.031056346371769905, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02687353454530239, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02677587792277336, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.0246184840798378, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024408860132098198, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016071245074272156, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.0158243365585804, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015596690587699413, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009957249276340008, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.23829752206802368, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2150087207555771, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.2064509391784668, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.18296799063682556, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10955911129713058, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.10077549517154694, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12676167488098145, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11674437671899796, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.11259892582893372, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.09467802196741104, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08890377730131149, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06462828814983368, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05591514706611633, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.0526292510330677, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.051844533532857895, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.032480936497449875, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.027478305622935295, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.027218762785196304, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.024551188573241234, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.024030908942222595, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01752493344247341, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017489315941929817, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.016491204500198364, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01213403046131134, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11813785135746002, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11048111319541931, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10746152698993683, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09710486978292465, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05510399863123894, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05210031569004059, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06258314102888107, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.057794369757175446, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.055916886776685715, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04943796619772911, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04689662531018257, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0317898653447628, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.027621323242783546, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0264037624001503, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.026111336424946785, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01590285450220108, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013585646636784077, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013465771451592445, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012436112388968468, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012248746119439602, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00830458290874958, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00829900149255991, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007899082265794277, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005455106496810913, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09725841134786606, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09088506549596786, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08796261996030807, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07948294281959534, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04524611681699753, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.042571790516376495, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05189840495586395, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.048110298812389374, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04593624174594879, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04059296101331711, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03850197419524193, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02629789337515831, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.022955551743507385, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.021680280566215515, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02137099951505661, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013146569021046162, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011160986497998238, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011023366823792458, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010216227732598782, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01002105139195919, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0068668220192193985, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006871395744383335, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00646735355257988, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004487908910959959, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.24126313626766205, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22606077790260315, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.22059780359268188, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.19967418909072876, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11317021399736404, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10741632431745529, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12776261568069458, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11759880930185318, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11478321254253387, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10167606920003891, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09640110284090042, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06494588404893875, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.056261271238327026, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0541648305952549, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05367502197623253, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.032469093799591064, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027673671022057533, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027483493089675903, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025315651670098305, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02498915232717991, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016850020736455917, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016528164967894554, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016087325289845467, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010466549545526505, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.22590696811676025, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2013203203678131, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1894170045852661, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.16358527541160583, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.104481540620327, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0937587320804596, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12876664102077484, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11771534383296967, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10851098597049713, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08887093514204025, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08260281383991241, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06605348736047745, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.057079967111349106, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05100947245955467, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04950882866978645, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03330137953162193, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027657639235258102, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02706911973655224, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.024723144248127937, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.023773323744535446, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.018279695883393288, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.019323553889989853, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016394751146435738, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01450137235224247, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.18694372475147247, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.17560966312885284, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.17200009524822235, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15622086822986603, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08816865086555481, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08408403396606445, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09804035723209381, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09075409919023514, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08938799798488617, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07945553213357925, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07549940794706345, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.050090491771698, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04366261139512062, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04248698428273201, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04221423715353012, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.025088800117373466, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02230667881667614, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.022227635607123375, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.020596344023942947, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.020430251955986023, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013445603661239147, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014137977734208107, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013068853877484798, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010178091935813427, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24560105800628662, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2309037744998932, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2263212949037552, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20552200078964233, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11539977043867111, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.1101929172873497, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1282258778810501, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11852267384529114, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11696496605873108, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10401748865842819, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09872318804264069, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06515904515981674, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05662822350859642, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05520707368850708, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.054871816188097, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.0325586311519146, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.028166716918349266, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.028063548728823662, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.025819793343544006, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02560216747224331, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016952671110630035, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016621094197034836, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01647312380373478, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010528809390962124, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.249661386013031, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.22309796512126923, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.21294018626213074, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.18732783198356628, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.1148328185081482, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.10454725474119186, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13438430428504944, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.123583123087883, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.11836837977170944, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.09797828644514084, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09173693507909775, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06866218149662018, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05934954807162285, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.0553530678153038, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05438949912786484, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.0345640666782856, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02914861962199211, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.02880125306546688, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.025869030505418777, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.025209594517946243, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01880909875035286, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.0189613476395607, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01753321662545204, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.013429547660052776, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12992089986801147, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12138892710208893, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11816433817148209, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10681387782096863, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.060634762048721313, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.057363156229257584, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06875178962945938, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06342557072639465, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06152614206075668, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05435715988278389, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05161292105913162, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03498202562332153, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03034578263759613, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.029079491272568703, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0287795327603817, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017496349290013313, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015043475665152073, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014924588613212109, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01378520205616951, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013598097488284111, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009193295612931252, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00929821003228426, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008774259127676487, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006270274054259062, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10629000514745712, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09932523965835571, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09623464941978455, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.0869513675570488, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0494927316904068, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.046607837080955505, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.056630901992321014, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05245191231369972, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05023317411541939, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04435189440846443, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04207786172628403, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02870136685669422, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.025027524679899216, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.023707086220383644, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.023389464244246483, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014334668405354023, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01220301166176796, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012066228315234184, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011166379787027836, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010963973589241505, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007494975812733173, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007507356349378824, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007081180345267057, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0049142842181026936, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.24465401470661163, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22932498157024384, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.22391965985298157, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20282405614852905, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.1148926392197609, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10910266637802124, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12938080728054047, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11912181228399277, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11656244844198227, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10320480912923813, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09782727807760239, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06579919904470444, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.056967593729496, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.054979633539915085, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05450760945677757, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03285011649131775, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028041312471032143, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027868326753377914, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02564079314470291, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.025334952399134636, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016986621543765068, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016664350405335426, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016255713999271393, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010482298210263252, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.22119158506393433, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.20057831704616547, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.19350218772888184, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.16846579313278198, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10288269817829132, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09487388283014297, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1180039644241333, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10763318091630936, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10513745993375778, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08729308843612671, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07999783754348755, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.060611940920352936, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.052390214055776596, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05022077634930611, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.049726150929927826, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.030762184411287308, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02734524756669998, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027200177311897278, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.024441083893179893, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024129046127200127, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017442036420106888, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018691249191761017, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016793636605143547, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014619812369346619, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17251622676849365, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1620800942182541, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.15872979164123535, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14422714710235596, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08129481971263885, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.0775693878531456, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09047603607177734, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08368634432554245, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08243146538734436, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07324999570846558, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06966998428106308, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04623030871152878, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.040188249200582504, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03911905363202095, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.038866523653268814, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.023101462051272392, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.020487679168581963, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.020415054634213448, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.018905211240053177, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.018749821931123734, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012390629388391972, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.012925716117024422, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012039072811603546, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00922777596861124, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23509475588798523, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2210124284029007, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2166021466255188, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1968691647052765, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11054039746522903, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10552597045898438, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12294608354568481, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11355020850896835, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11203375458717346, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09964185208082199, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09474067389965057, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06251848489046097, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.054201312363147736, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.052853457629680634, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05252174288034439, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.031201357021927834, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02695022150874138, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.026855137199163437, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.0247141532599926, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02450563572347164, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01631690002977848, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015902338549494743, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015856264159083366, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010040191933512688, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.23774735629558563, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.21336475014686584, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.20400910079479218, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.18036508560180664, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10936755686998367, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.09986736625432968, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12775284051895142, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11735818535089493, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.11256204545497894, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.09384429454803467, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08821718394756317, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06519201397895813, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05623935908079147, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.052611786872148514, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.051730677485466, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03272217512130737, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.027569502592086792, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.027262642979621887, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.02453724667429924, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.023945024237036705, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.017771942541003227, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017750859260559082, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.016611725091934204, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012408253736793995, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12612661719322205, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11808325350284576, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11490257829427719, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1039545089006424, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05884695425629616, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05571427196264267, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06685030460357666, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.061636101454496384, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05969240143895149, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05288366973400116, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05028744414448738, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03398962318897247, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.029460079967975616, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.028183691203594208, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02788701467216015, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01699250191450119, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014505419880151749, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014382119290530682, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013291659764945507, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013100654818117619, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00886919628828764, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008848444558680058, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008446895517408848, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0058016302064061165, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10340142250061035, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09671082347631454, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09370078146457672, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08470600098371506, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04816759377717972, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.045377183705568314, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05531032010912895, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05114631727337837, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.048887938261032104, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.043249741196632385, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04114283621311188, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.028031444177031517, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02442823350429535, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02308594435453415, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02276318334043026, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014004218392074108, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011892613023519516, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011751554906368256, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010899313725531101, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01069601345807314, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007334492169320583, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007343416102230549, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006916959770023823, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0048257652670145035, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.23975549638271332, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22488035261631012, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2197841852903366, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.19931116700172424, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11250780522823334, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.1070244088768959, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12657159566879272, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11645111441612244, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11408858001232147, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10120036453008652, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09609171003103256, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06432407349348068, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05562269315123558, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05381292104721069, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.053381022065877914, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0321221686899662, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027440035715699196, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02727995626628399, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02510763332247734, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02482355199754238, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01659989543259144, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01625267043709755, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015910662710666656, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010199973359704018, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.21662208437919617, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1935219168663025, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.18606923520565033, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.15572378039360046, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10146646946668625, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09309393167495728, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11645425856113434, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10666361451148987, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10395275056362152, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08385032415390015, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07535615563392639, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05992794409394264, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0517597496509552, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04940454661846161, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.048853833228349686, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.030143266543745995, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02657220885157585, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0264087226241827, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.023248666897416115, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.022879725322127342, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016670186072587967, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017822427675127983, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01596054621040821, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.013579433783888817, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17597134411334991, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16545946896076202, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1620837301015854, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14737027883529663, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08294732868671417, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.0791846215724945, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09229590743780136, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08532093465328217, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08407973498106003, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0748458057641983, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07115110754966736, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.047112446278333664, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04092876613140106, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03986990079283714, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03962079808115959, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.023557474836707115, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.020791497081518173, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.020723480731248856, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019180435687303543, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019026173278689384, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012647153809666634, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.012972773984074593, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012302391231060028, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009121776558458805, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23802225291728973, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22394299507141113, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21955201029777527, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19966860115528107, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11194655299186707, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10692882537841797, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12461204826831818, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1149340495467186, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11344098299741745, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10099216550588608, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09600330144166946, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06328333169221878, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05487208440899849, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05353129655122757, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05320785939693451, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03163382411003113, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.027334339916706085, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02723919041454792, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02508772723376751, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02488226629793644, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016662750393152237, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016180414706468582, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016211198642849922, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01029482763260603, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.2387976348400116, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.21408474445343018, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.2043950855731964, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.18073804676532745, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10967493802309036, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.10010320693254471, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1282712072134018, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11821019649505615, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.11303795874118805, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.09422174096107483, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08856798708438873, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06545612215995789, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.056798648089170456, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05290856212377548, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05194123834371567, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03284813091158867, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.027924206107854843, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.027586955577135086, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.024929387494921684, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.024299394339323044, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.017589695751667023, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018272047862410545, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01629789173603058, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.013051584362983704, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1267344355583191, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11888437718153, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11577434837818146, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10502025485038757, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05922402814030647, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.056159231811761856, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06715410202741623, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06196628138422966, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06007223576307297, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05336404964327812, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.050799451768398285, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03411807119846344, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.029632817953824997, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02838665060698986, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02808794379234314, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01706252433359623, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014616685919463634, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014495360665023327, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01342654600739479, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013233743607997894, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008932657539844513, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008931510150432587, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008528118953108788, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005895843729376793, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10669996589422226, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10006813704967499, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09715160727500916, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08804410696029663, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04981553554534912, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04707185551524162, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.056859396398067474, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05260411649942398, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.050514183938503265, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04485978186130524, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04265749454498291, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.028829200193285942, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02511085942387581, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.023862138390541077, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.023551534861326218, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014410979114472866, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01225564070045948, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012120449915528297, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011245573870837688, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011054611764848232, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007513873744755983, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007486721035093069, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007117245811969042, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004860574379563332, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2432994544506073, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22855308651924133, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.22376173734664917, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.2032243013381958, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11423400044441223, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10883686691522598, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1279277205467224, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11791523545980453, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1157941147685051, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10295816510915756, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09786069393157959, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06500311195850372, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.056329913437366486, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05461205542087555, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.054214105010032654, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.032445114105939865, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027744675055146217, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02760407142341137, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025408178567886353, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.025146055966615677, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01668788492679596, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016270864754915237, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01603681966662407, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0100199393928051, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.23854050040245056, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.21372033655643463, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.20580320060253143, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.17329315841197968, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11152774840593338, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10247690975666046, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12708227336406708, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11664026230573654, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1142626404762268, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09251166135072708, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08302705734968185, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06552213430404663, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.056713949888944626, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.054385337978601456, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.053820617496967316, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03307965397834778, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.029456080868840218, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0293022058904171, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025890696793794632, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.025537028908729553, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.018586255609989166, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.019933203235268593, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01788247935473919, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01542214211076498, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.18360430002212524, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1726527065038681, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1691603660583496, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15389461815357208, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08642292022705078, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08250969648361206, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09587915986776352, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08881686627864838, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08762333542108536, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07802111655473709, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07410972565412521, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.048885613679885864, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.042594220489263535, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.041516177356243134, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.041263680905103683, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.024435872212052345, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02155221626162529, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02148025669157505, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.01987065002322197, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019710874184966087, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012906273826956749, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01330888457596302, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012553202919661999, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009199931286275387, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24316062033176422, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22879689931869507, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22434720396995544, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.2041846215724945, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11430758982896805, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10923805087804794, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12664029002189636, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11729881167411804, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11584514379501343, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10323227941989899, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09803996980190277, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06437569111585617, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05602096766233444, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05466452240943909, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05434596166014671, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.032121360301971436, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.027840444818139076, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02775346301496029, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.025549447163939476, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.025348413735628128, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016601959243416786, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016361817717552185, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016144612804055214, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010271457955241203, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.24054308235645294, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2149766981601715, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.20472249388694763, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.18111827969551086, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11037147045135498, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.10023960471153259, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1296161413192749, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11957119405269623, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.11387142539024353, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.0946601927280426, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08891303837299347, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06596499681472778, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05732259526848793, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.053129225969314575, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.0521182045340538, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03302965685725212, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.027852945029735565, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.027489693835377693, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.02477586641907692, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.024098815396428108, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.017592083662748337, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018016556277871132, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.016247617080807686, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01255759783089161, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12691013514995575, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11890150606632233, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11574117839336395, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10490718483924866, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0593118816614151, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.056169819086790085, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06747359037399292, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06219714507460594, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06015874445438385, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05338408797979355, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05084514617919922, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.034334540367126465, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02975546196103096, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02844851091504097, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.028132373467087746, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017176451161503792, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014698682352900505, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014571011066436768, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013509636744856834, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013309506699442863, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009031853638589382, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009069125168025494, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008605287410318851, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006071107462048531, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10784889757633209, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1009570062160492, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09786637127399445, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08866927772760391, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05029573291540146, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04742734134197235, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05768732726573944, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05336020514369011, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05101708322763443, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.045236457139253616, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.043039470911026, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.029235422611236572, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.025462647899985313, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02408807910978794, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0237579345703125, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01461299229413271, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012394845485687256, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012251915410161018, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011372452601790428, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011163865216076374, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007642503362149, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007622472941875458, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007216573227196932, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004981216508895159, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2407606989145279, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22605089843273163, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.22106170654296875, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20072945952415466, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11309977620840073, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10762696713209152, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12690463662147522, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11689421534538269, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11464396864175797, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10186626762151718, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09676215052604675, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06451042741537094, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05581425875425339, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.054075293242931366, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05366271734237671, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.032189980149269104, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027522219344973564, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02738192118704319, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02520977519452572, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02494032308459282, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016584279015660286, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01622907444834709, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015919677913188934, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01011338084936142, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2129761129617691, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.18873004615306854, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.18085584044456482, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.15366199612617493, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09919235110282898, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09023316204547882, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11488926410675049, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10487744212150574, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10202062129974365, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08181387931108475, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07479636371135712, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.059449825435876846, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05112970620393753, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04856857657432556, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04794888570904732, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.030157791450619698, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026539409533143044, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026366230100393295, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.023334281519055367, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.022942963987588882, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017248542979359627, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018334781751036644, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0164862722158432, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014409532770514488, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1825879067182541, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.17174388468265533, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16835615038871765, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15325702726840973, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08603394776582718, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.0821652039885521, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09565537422895432, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08837076276540756, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08717235922813416, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07767335325479507, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07396390289068222, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04878663644194603, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.042387619614601135, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04133225604891777, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04108808934688568, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.024405092000961304, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.021540403366088867, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02147047407925129, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019880466163158417, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019731322303414345, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013102086260914803, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013418405316770077, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012762113474309444, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00940932147204876, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24142636358737946, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22719791531562805, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22281736135482788, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20282132923603058, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.1135651022195816, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10853088647127151, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12606839835643768, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11652585864067078, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11508217453956604, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10258457809686661, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09753015637397766, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06407949328422546, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05564882233738899, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05431484431028366, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05399831384420395, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03199741989374161, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.027721401304006577, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.027636121958494186, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.025469407439231873, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02526054158806801, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016783395782113075, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01639687828719616, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016346314921975136, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010432285256683826, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.23775078356266022, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2123284637928009, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.20194841921329498, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.1789894700050354, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10898053646087646, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.09885210543870926, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12857502698898315, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11850830912590027, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.11249565333127975, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.09348739683628082, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08813084661960602, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06546244770288467, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.056651193648576736, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05236102640628815, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.051310956478118896, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03267225995659828, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.027298465371131897, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.026919586583971977, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.024255113676190376, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.023552007973194122, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01737501658499241, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017508763819932938, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01598495803773403, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.011937166564166546, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11948957294225693, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11199995130300522, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10896104574203491, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09886305779218674, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05574537441134453, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.052786216139793396, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06333879381418228, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.058510832488536835, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05651785805821419, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05019340664148331, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04780779406428337, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.032139893621206284, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.027940403670072556, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.026688052341341972, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0263869296759367, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01608029380440712, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013704062439501286, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013582347892224789, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012581435032188892, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012395530939102173, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008402963168919086, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008344026282429695, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00799418706446886, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005427255295217037, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10171075165271759, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09524600952863693, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09229886531829834, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08367656916379929, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04736907035112381, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.044643744826316833, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05437021702528, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05028989166021347, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.048072509467601776, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04265576973557472, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04060713201761246, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02755911834537983, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.024013100191950798, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.022695492953062057, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02238408848643303, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013781133107841015, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011674824170768261, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01153368130326271, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01071801595389843, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010518055409193039, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007206278387457132, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007179711014032364, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00679260166361928, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004686725325882435, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.23619362711906433, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2217329740524292, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2166804075241089, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.19683291018009186, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.1106383204460144, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10526417195796967, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12434738874435425, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11462187767028809, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11217912286520004, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0997442752122879, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09485655277967453, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06313640624284744, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0547061525285244, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.052886299788951874, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.052444569766521454, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03150585666298866, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026922713965177536, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026760458946228027, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02466808445751667, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02438473142683506, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016260845586657524, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015904169529676437, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015581794083118439, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009903850965201855, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.21153010427951813, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.18704825639724731, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.17853306233882904, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1501818299293518, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0983055904507637, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08946507424116135, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11486673355102539, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10525979846715927, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1015668660402298, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0813712403178215, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07305319607257843, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.059364888817071915, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05123402550816536, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04809487238526344, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04733816534280777, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03010874055325985, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02628917247056961, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02607779949903488, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.023160148411989212, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.022684019058942795, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017119746655225754, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01823793351650238, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01617102138698101, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014257200062274933, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17719338834285736, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16674402356147766, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1635080724954605, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14882312715053558, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08337472379207611, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07964310795068741, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09262700378894806, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08560846000909805, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08449461311101913, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07533816248178482, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07171109318733215, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.047202762216329575, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.041056882590055466, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.040044479072093964, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03980201855301857, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.0236497949808836, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02081880159676075, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.020747750997543335, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019214658066630363, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019065096974372864, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012616671621799469, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.012879946269094944, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012293516658246517, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008947378024458885, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.236493781208992, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22270776331424713, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21843238174915314, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.198915496468544, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11106964200735092, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10621358454227448, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12327580899000168, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11395251750946045, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11254915595054626, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10043493658304214, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09558577835559845, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06254132837057114, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05444325506687164, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05314547196030617, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05283917859196663, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.031268924474716187, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.027149958536028862, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.027059495449066162, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024965351447463036, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02476552687585354, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016270553693175316, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016076376661658287, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015837430953979492, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010275891982018948, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.2404518574476242, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.21560613811016083, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.20580941438674927, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.1829867959022522, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11035571992397308, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.10065519064664841, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12976811826229095, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11916108429431915, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.11377734690904617, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.09525628387928009, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08986055105924606, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.0660339966416359, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.057134419679641724, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05317649990320206, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05220998078584671, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.033135995268821716, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.027944520115852356, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.027602557092905045, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.024993130937218666, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.02435176819562912, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.017830535769462585, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01813233271241188, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.016545476391911507, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012765337713062763, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11534664034843445, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10805782675743103, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10507526993751526, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09530334174633026, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.053802989423274994, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05092933401465416, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.061264894902706146, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05649210885167122, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.054578859359025955, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.048445723950862885, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04617999866604805, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.031164586544036865, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.026996178552508354, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.025784531608223915, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02548881247639656, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015587668865919113, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013273083604872227, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013149921782314777, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012187092565000057, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012002585455775261, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008179137483239174, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008123276755213737, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0077819605357944965, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005340151954442263, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09738519042730331, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09116865694522858, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08815526217222214, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07985227555036545, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04531256482005119, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04262877628207207, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05238378793001175, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04840190336108208, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04597680643200874, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04077081009745598, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.038862474262714386, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.026550589129328728, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.023096628487110138, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.021717706695199013, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.021379107609391212, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013274557888507843, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011205513961613178, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011054180562496185, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010299102403223515, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010085302405059338, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006989846006035805, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0069639841094613075, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006559993140399456, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004589042626321316, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2403072714805603, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22567977011203766, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.22077421844005585, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20053374767303467, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11269676685333252, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10734447091817856, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12645527720451355, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11642605066299438, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11427925527095795, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10158532857894897, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09659527987241745, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06426160037517548, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05558121204376221, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05386771634221077, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05345647409558296, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.032051462680101395, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027376297861337662, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027240697294473648, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025075100362300873, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024817604571580887, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016506806015968323, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016091838479042053, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015852289274334908, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009938031435012817, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.20109328627586365, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.17400673031806946, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.16529256105422974, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1356644183397293, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09276221692562103, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08331338316202164, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10797489434480667, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09899035096168518, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09599356353282928, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07376403361558914, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06583039462566376, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05560726672410965, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04837191104888916, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.045597828924655914, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04491059109568596, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.028223920613527298, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.025192996487021446, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0250190868973732, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.021766729652881622, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.021343139931559563, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016129329800605774, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017745915800333023, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015301859937608242, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014173140749335289, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1808718889951706, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.17023080587387085, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16693779826164246, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15208354592323303, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.0851103737950325, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08135292679071426, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09451433271169662, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08738149702548981, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08626335859298706, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07694877684116364, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07322399318218231, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04815226420760155, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.041836559772491455, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.040816329419612885, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.040577277541160583, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02402493916451931, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02112952247262001, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.021060120314359665, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019490445032715797, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019343052059412003, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012727332301437855, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01296627800911665, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012395924888551235, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008865873329341412, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23396438360214233, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22036953270435333, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21618157625198364, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19704565405845642, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10997939109802246, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.1051807776093483, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12190694361925125, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11278484761714935, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11140391230583191, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09947387874126434, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09462463855743408, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06196771189570427, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05380965769290924, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05254058912396431, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05223878473043442, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.030855795368552208, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02674436755478382, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.026661382988095284, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024576803669333458, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02438654936850071, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016004329547286034, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015700580552220345, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01557767391204834, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009832119569182396, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.23323769867420197, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2078220397233963, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.19736312329769135, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.17516639828681946, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10677817463874817, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.09671838581562042, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12681983411312103, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11643221229314804, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.11028371751308441, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.09159855544567108, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08660276234149933, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06462102383375168, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05577927082777023, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.051441628485918045, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05036511272192001, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.0324060283601284, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.027062665671110153, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.02667335979640484, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.02411506325006485, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.023416729643940926, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01755206286907196, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017686115577816963, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.016147198155522346, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012448804453015327, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1240629330277443, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11658794432878494, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1137196496129036, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10337875783443451, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0579984113574028, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05508763715624809, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06540485471487045, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06056131795048714, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05880903825163841, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.052364468574523926, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04984118044376373, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03318695351481438, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.028907708823680878, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.027748936787247658, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.027477476745843887, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01659395359456539, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014195488765835762, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014084204100072384, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013041693717241287, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012867686338722706, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008639835752546787, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008526437915861607, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008257175795733929, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005443368572741747, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10871283710002899, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10214229673147202, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09937641024589539, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09036010503768921, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.050787754356861115, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.048150017857551575, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.057779461145401, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05335037410259247, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05150633305311203, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.045877136290073395, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04372899606823921, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.029299013316631317, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02545260079205036, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02430679462850094, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02403314784169197, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014626970514655113, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012437986209988594, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012320809066295624, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011432169936597347, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011259020306169987, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007600011304020882, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007499636150896549, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007226808927953243, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004782437812536955, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.24541108310222626, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.23105016350746155, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.22634418308734894, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.2060239017009735, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11525362730026245, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11001031845808029, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12864333391189575, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11870958656072617, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11678976565599442, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10415108501911163, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09913624078035355, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06532877683639526, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05663786083459854, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05507365241646767, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.054697971791028976, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03258471190929413, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02797485888004303, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027854597195982933, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025687672197818756, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.025437140837311745, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016777046024799347, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016376517713069916, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01619098149240017, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01011068094521761, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.21186426281929016, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1817876100540161, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1712588518857956, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1363794207572937, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09795055538415909, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08663592487573624, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11503398418426514, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10574599355459213, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10161492973566055, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07534287124872208, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06803018599748611, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05930168554186821, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05119399353861809, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04766469821333885, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04681563749909401, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02983180433511734, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02561771869659424, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.025358665734529495, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02136337198317051, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02079067938029766, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016489330679178238, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01732693798840046, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015385810285806656, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.013004542328417301, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.18210813403129578, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.17143414914608002, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16820669174194336, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1532326340675354, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08563581109046936, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08188868314027786, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09491457790136337, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08787962049245834, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08678756654262543, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07747390121221542, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07371307164430618, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04837045818567276, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04209725186228752, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04107324033975601, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.040839508175849915, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.024162761867046356, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.021247079595923424, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.021180149167776108, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019607817754149437, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019458482041954994, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012724144384264946, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.012995671480894089, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012388965114951134, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008850475773215294, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23719502985477448, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22352059185504913, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21932806074619293, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1999642252922058, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11139413714408875, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10658462345600128, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12353423237800598, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11421824991703033, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11284946650266647, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10082432627677917, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.096054807305336, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06278909742832184, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.054541777819395065, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05326739698648453, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.052967898547649384, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03140954300761223, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.027176765725016594, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.027094177901744843, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024999499320983887, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024807630106806755, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01640525832772255, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016039017587900162, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015985244885087013, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010185835883021355, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.2361368089914322, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2119479775428772, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.2017265409231186, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.17973026633262634, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10833527892827988, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.09853455424308777, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12829071283340454, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11801018565893173, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.11172403395175934, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.09376473724842072, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08857300132513046, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06515717506408691, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.056567735970020294, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.052187371999025345, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05111178755760193, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03262561932206154, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02738443948328495, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.026977531611919403, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.0245371051132679, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.023824140429496765, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01738988421857357, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017799243330955505, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.015970705077052116, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012404611334204674, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11767543107271194, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1104351282119751, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10754658281803131, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09770641475915909, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05495921149849892, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05210960656404495, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06220871955156326, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05756443366408348, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05575832352042198, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.049567703157663345, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.047165852040052414, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.031575050204992294, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.027504023164510727, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.026302548125386238, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.026016024872660637, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015782255679368973, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013486096635460854, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013370697386562824, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012384595349431038, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012206797488033772, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008233238011598587, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008161982521414757, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007844379171729088, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005263285245746374, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.0999460443854332, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09377589076757431, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09105417877435684, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08268026262521744, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04667798429727554, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.044092126190662384, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05345631763339043, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.049379825592041016, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04732727259397507, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04211081564426422, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04014396294951439, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0271065104752779, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.023578129708766937, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.022342175245285034, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.022048605605959892, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01355278305709362, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011457130312919617, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011327789165079594, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010531720705330372, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010344168171286583, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007065656594932079, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006978429853916168, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006668947171419859, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004485332407057285, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2436116635799408, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22912585735321045, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.22425509989261627, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20396484434604645, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11431942880153656, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10898785293102264, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12777318060398102, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11786001920700073, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11585459113121033, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10315021127462387, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09815478324890137, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06486277282238007, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05623270571231842, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05460822582244873, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05421588942408562, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03235772252082825, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027744945138692856, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027618251740932465, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025439351797103882, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.025188906118273735, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01664079912006855, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016249923035502434, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016027459874749184, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0100344093516469, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.19895754754543304, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1686173975467682, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.15884751081466675, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.13512785732746124, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09136316925287247, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07970231026411057, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10663759708404541, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.098036989569664, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09536085277795792, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07221526652574539, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0661846399307251, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.054896291345357895, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04740244150161743, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04436337947845459, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04363660886883736, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0276547372341156, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.023764682933688164, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.023593991994857788, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02012411691248417, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01964867115020752, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015225294046103954, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015936901792883873, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014293171465396881, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011895771138370037, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.18896107375621796, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.17798957228660583, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1745680868625641, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15911909937858582, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08889008313417435, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08500789105892181, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.0984659269452095, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09117546677589417, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09006771445274353, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08042572438716888, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07648373395204544, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05015582963824272, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04366374760866165, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.042622193694114685, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0423838272690773, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.025053611025214195, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.022020868957042694, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02194841578602791, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.020310429856181145, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.020152240991592407, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013182129710912704, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01342860795557499, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012842469848692417, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009092593565583229, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2394653558731079, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22565792500972748, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22144369781017303, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20187696814537048, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.112502820789814, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10761486738920212, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1245458796620369, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11531770974397659, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11395905911922455, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.1018054410815239, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09684107452630997, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06326059252023697, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0550597608089447, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.0537816658616066, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05347822606563568, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03156224265694618, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.027357997372746468, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02727430872619152, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02514920011162758, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024955784901976585, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016314778476953506, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.0160257276147604, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01589024066925049, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01000615581870079, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.23912540078163147, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2149013727903366, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.20485249161720276, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.18258784711360931, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10980267077684402, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.1001085489988327, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12953491508960724, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11921890825033188, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.11315827071666718, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.09514393657445908, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08984146267175674, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06584495306015015, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.057107217609882355, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.0528612919151783, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05181177705526352, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03318916633725166, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02767873741686344, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.027286747470498085, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.024798771366477013, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.024105200543999672, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01807006075978279, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017875930294394493, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.0167623832821846, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01237862091511488, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11982738971710205, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11251585930585861, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1095767468214035, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09952931851148605, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05601023882627487, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.053073178976774216, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06372707337141037, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05874386429786682, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.056781917810440063, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05055378004908562, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04821869358420372, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.032353829592466354, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.028071796521544456, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.026827532798051834, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.026529815047979355, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016213055700063705, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013772493228316307, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013652057386934757, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012662755325436592, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012472144328057766, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008496623486280441, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00836494192481041, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00808791071176529, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005420043133199215, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10371651500463486, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09741143137216568, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09471144527196884, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08605634421110153, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.048498887568712234, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04591092839837074, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05530678480863571, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05112172290682793, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.049188051372766495, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04377955570816994, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04172324389219284, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.028040491044521332, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.024400677531957626, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.023219024762511253, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.022931713610887527, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014022421091794968, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011882791295647621, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011762643232941628, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010918867774307728, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01074032112956047, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007286000065505505, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00719106663018465, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006905464921146631, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0045842560939490795, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2420882284641266, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22770318388938904, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2229962944984436, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.2029268890619278, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.1136690303683281, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.1084519773721695, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12705999612808228, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.1172032430768013, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11522100865840912, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10266634076833725, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09769171476364136, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06453581154346466, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05594588816165924, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.054322533309459686, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05393598973751068, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03219633921980858, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027591129764914513, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027464913204312325, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025309529155492783, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.025057343766093254, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01656733825802803, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01615477353334427, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015956418588757515, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009947292506694794, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.18711698055267334, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1599006950855255, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1520562767982483, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.12321678549051285, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0847715511918068, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07544443756341934, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.09920552372932434, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.08969686925411224, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.08766104280948639, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06663621962070465, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.060906440019607544, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05066452920436859, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04426678270101547, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04206513985991478, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04153159633278847, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02642541565001011, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.023801764473319054, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02368137426674366, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.020613620057702065, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0202809888869524, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015887867659330368, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017261086031794548, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015276435762643814, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014284837059676647, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.19685406982898712, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1854027509689331, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.18185001611709595, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.16576693952083588, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09267269819974899, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08860687166452408, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1027434915304184, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09505553543567657, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.0938725620508194, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08384545892477036, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07974720746278763, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05234687402844429, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04550408199429512, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.044412072747945786, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04416114464402199, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.026168549433350563, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.022898610681295395, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.022824600338935852, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.021102556958794594, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.020943744108080864, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013787412084639072, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01388629525899887, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01343846321105957, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009311787784099579, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2438918501138687, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22984755039215088, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2255357950925827, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20558848977088928, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11464832723140717, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10967819392681122, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12706895172595978, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11752509325742722, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11614277958869934, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10374066978693008, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09883730858564377, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06476327031850815, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05616854503750801, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05485428869724274, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05454956367611885, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.0323842316865921, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02801915444433689, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02793291211128235, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.025778062641620636, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02558326907455921, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01698247529566288, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016580060124397278, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016548864543437958, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010592442937195301, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.23912313580513, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2153434008359909, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.2054818719625473, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.18345046043395996, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10980794578790665, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.10029774904251099, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12903526425361633, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.1191321611404419, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.11309625208377838, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.09539549052715302, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09012597799301147, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06558310240507126, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05693212151527405, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05271365866065025, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05168463662266731, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.032829903066158295, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.027327172458171844, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.026945412158966064, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.024454565718770027, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.023767173290252686, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.017384789884090424, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01729150302708149, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.016024738550186157, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.011537051759660244, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12324557453393936, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11571723967790604, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11273452639579773, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10241792351007462, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05765464901924133, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0546695850789547, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06527623534202576, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.060354456305503845, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05845802649855614, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05200796574354172, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04953179880976677, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.033135171979665756, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02882370352745056, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.027600347995758057, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02731124684214592, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01656256429851055, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014147991314530373, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014028445817530155, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012994893826544285, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01280800811946392, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008637168444693089, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008551284670829773, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008236007764935493, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005510942079126835, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10589288920164108, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09947481751441956, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09668231010437012, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08787539601325989, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.049568235874176025, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.046885836869478226, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.056579023599624634, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05224401131272316, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05024821311235428, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.044726744294166565, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04268158972263336, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.028720872476696968, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.024962134659290314, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02373250015079975, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02344346046447754, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014352233149111271, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01214424055069685, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01201748475432396, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011154238134622574, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010972234420478344, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007458103820681572, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00734325684607029, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007061943877488375, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004678981378674507, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2481296956539154, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2333640456199646, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2285558432340622, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20798912644386292, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11659298092126846, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11118631809949875, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13007041811943054, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12010414898395538, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11816281080245972, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10524718463420868, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10013725608587265, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06613617390394211, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05730576813220978, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05570676922798157, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05531468614935875, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03297749161720276, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028279123827815056, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02815217711031437, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025924688205122948, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02567640319466591, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016948198899626732, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016530124470591545, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016335749998688698, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010155771858990192, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.20734301209449768, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.18101251125335693, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.17046630382537842, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.14372429251670837, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09635436534881592, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0855507180094719, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11453288793563843, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10521739721298218, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09910440444946289, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07701539993286133, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07165306806564331, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05893588811159134, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05071458965539932, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04667726531624794, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04568853974342346, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.029644865542650223, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02474072203040123, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02434578537940979, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.020993700250983238, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.020338647067546844, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01623307354748249, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01641465723514557, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01497636828571558, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011826002039015293, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2014743983745575, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.18984445929527283, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.18621402978897095, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.16970407962799072, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09489491581916809, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09073042124509811, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1052480936050415, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.0973258763551712, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09612831473350525, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08579526841640472, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.0817541554570198, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0535830557346344, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.046582404524087906, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04548027738928795, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.045215506106615067, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.026807161048054695, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.023465273901820183, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02338905818760395, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.021632341668009758, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02146441861987114, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014135986566543579, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014251518994569778, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013781171292066574, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00958641991019249, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24548566341400146, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23124513030052185, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22695355117321014, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20683883130550385, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11544622480869293, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11041678488254547, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12799622118473053, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11835052818059921, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11696060001850128, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.104429230093956, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09942149370908737, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06512174755334854, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05655517801642418, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05523370951414108, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.054917387664318085, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03254197910428047, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02819569781422615, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02810969203710556, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.025927118957042694, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.025730671361088753, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017027759924530983, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016665920615196228, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01659819670021534, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01061420887708664, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.24052143096923828, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.21690893173217773, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.20699980854988098, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.18461325764656067, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11047360301017761, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.10100545734167099, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13003234565258026, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11984232068061829, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.11372515559196472, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.09598476439714432, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09061331301927567, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.065961554646492, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05718844756484032, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05298266559839249, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05195818468928337, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.0329742394387722, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.027353661134839058, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.02695799618959427, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.02444165199995041, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.023756949231028557, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.017383040860295296, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01714164949953556, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.016014505177736282, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01123124174773693, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12284711003303528, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11538252979516983, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11232610046863556, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10211493074893951, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05755472183227539, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05453721806406975, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06543193757534027, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.060310669243335724, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05835813283920288, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05190569907426834, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04954579472541809, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.033216770738363266, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.028837133198976517, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0275574903935194, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.027251705527305603, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01661253534257412, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01410814467817545, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01398069690912962, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012953609228134155, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012759407982230186, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008643771521747112, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008508559316396713, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008231147192418575, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005447302013635635, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10781458020210266, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10128741711378098, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09844818711280823, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.0895320475101471, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05043365806341171, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0476938933134079, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.057821791619062424, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.053171366453170776, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.051136888563632965, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.045515887439250946, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.043541938066482544, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02930309623479843, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02538519725203514, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.024154774844646454, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02385995164513588, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014654316939413548, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012369408272206783, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01224222406744957, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011362052522599697, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01117522083222866, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007644301280379295, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0074797822162508965, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007252011448144913, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004765240475535393, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.24887236952781677, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.23407642543315887, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.22922441363334656, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20868368446826935, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11706849187612534, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11163707077503204, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13072869181632996, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.120621457695961, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11867095530033112, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10566829890012741, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10056403279304504, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06652215868234634, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0575871467590332, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0559784360229969, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.055598169565200806, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.033183954656124115, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02852031961083412, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.028403380885720253, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02616271749138832, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02591642737388611, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017135106027126312, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01682128757238388, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016520613804459572, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010562277399003506, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.20912867784500122, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.17412592470645905, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1627204716205597, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1379987895488739, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09599597007036209, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08309806883335114, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11309947073459625, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10316865891218185, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10005888342857361, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07400834560394287, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0675860121846199, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.058174289762973785, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.050280552357435226, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.047031644731760025, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04625017195940018, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.029455125331878662, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.025833575055003166, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.025649091228842735, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02182338573038578, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.021308090537786484, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016683973371982574, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01806741952896118, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015709349885582924, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01420536544173956, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.20352743566036224, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1916203796863556, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.18795450031757355, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17132428288459778, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09584809839725494, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09161219745874405, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10622583329677582, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09831716120243073, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09713178873062134, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08665838837623596, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08244694024324417, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05415064096450806, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.047064196318387985, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04593108966946602, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04567151516675949, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.027049072086811066, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.023673849180340767, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.023597631603479385, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.021805359050631523, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02164146490395069, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014237122610211372, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014336134307086468, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013867880217730999, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00959180761128664, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24610137939453125, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23186123371124268, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22745536267757416, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20736908912658691, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11578545719385147, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11068546026945114, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12831316888332367, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11873270571231842, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11728788167238235, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.1046837866306305, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09962763637304306, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06520022451877594, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05669224262237549, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05536267161369324, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0550425760447979, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03256740793585777, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.028193851932883263, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.028102830052375793, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.025898292660713196, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.025697214528918266, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016918564215302467, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016555428504943848, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016477691009640694, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010394281707704067, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.24104617536067963, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.21795670688152313, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.20813313126564026, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.1857229322195053, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11100155860185623, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.10164126008749008, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13054537773132324, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.1204339787364006, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.11417973041534424, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.09669917821884155, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09133686870336533, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06648228317499161, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.057691264897584915, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05341546982526779, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05235926806926727, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03345801308751106, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.027875451371073723, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.027468807995319366, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.02504926733672619, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.02436027117073536, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.017890699207782745, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017874425277113914, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01649586483836174, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012233883142471313, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.132928729057312, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12476401776075363, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12171357870101929, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1106129065155983, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.062296342104673386, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05913177505135536, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07022962719202042, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06499084085226059, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06318476051092148, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.056144025176763535, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05337060987949371, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03559989109635353, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03103315457701683, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02981373481452465, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.029515312984585762, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017791133373975754, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015210467390716076, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.015097161754965782, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013947606086730957, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013759267516434193, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009211310185492039, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009085064753890038, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008807200007140636, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005730841308832169, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11509881168603897, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10807599127292633, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1052352711558342, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09569548815488815, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05397506058216095, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05118096247315407, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06133411079645157, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.056545425206422806, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05473420023918152, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04866965860128403, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04639836773276329, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03113011084496975, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02699805051088333, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.025851447135210037, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.025575071573257446, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015565779060125351, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013209161348640919, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013097047805786133, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012117584235966206, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01194202620536089, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008092202246189117, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007930949330329895, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00771530345082283, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005030702333897352, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2521115243434906, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2370375394821167, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.23219621181488037, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.2113272249698639, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11856235563755035, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11306977272033691, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1322038471698761, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.1220199465751648, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1201929897069931, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10695485770702362, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10172998160123825, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06722302734851837, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0582425519824028, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05665993690490723, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05628422275185585, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03351815789937973, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02878250740468502, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.028672652319073677, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02638528123497963, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.026141999289393425, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017259016633033752, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01684609241783619, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01666291058063507, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010394822806119919, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.20848755538463593, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.18220114707946777, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1727655827999115, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.14680223166942596, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.096031554043293, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.085659459233284, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11238997429609299, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10374521464109421, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09942509979009628, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07857508957386017, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07164344191551208, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.057698220014572144, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04983149468898773, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.046251118183135986, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04538793861865997, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.028950823470950127, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02417089231312275, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.023880932480096817, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.020723238587379456, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.020138945430517197, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01551854144781828, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01552538014948368, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014338497072458267, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010700252838432789, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.20849592983722687, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1962275505065918, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.19244444370269775, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17528831958770752, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09824357181787491, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09388354420661926, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10900480300188065, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10081211477518082, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09958187490701675, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08875804394483566, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08439836651086807, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.055542998015880585, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04825812205672264, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04709433391690254, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04682404547929764, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.027773337438702583, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.024251243099570274, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.024174297228455544, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02231578156352043, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.022144274786114693, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014633099548518658, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01465647853910923, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01425087545067072, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009771650657057762, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2490708827972412, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23444977402687073, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.23000143468379974, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20958200097084045, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11723101139068604, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11203394830226898, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12983490526676178, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1201973706483841, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11878014355897903, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.1059054508805275, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10072098672389984, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06603661179542542, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05741394683718681, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05605679377913475, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05572989955544472, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03296376392245293, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.028511764481663704, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02841700240969658, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.026174107566475868, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.025965331122279167, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017099034041166306, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016707008704543114, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01664820685982704, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010423863306641579, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.24668650329113007, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2235163301229477, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.2139778435230255, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.1908249855041504, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11364375054836273, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.1044502705335617, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1328217089176178, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12259896099567413, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.11675646156072617, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.09905844926834106, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09333674609661102, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06734739989042282, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05849946662783623, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.054453782737255096, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05346381291747093, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.0336998850107193, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.028005782514810562, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.027630943804979324, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.025061985477805138, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.024399612098932266, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01770530641078949, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017367945984005928, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01638142019510269, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.011205466464161873, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13273204863071442, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12453585118055344, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12125485390424728, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11013557761907578, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.062236487865448, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.058972135186195374, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07046366482973099, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06516983360052109, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06312932819128036, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.056052446365356445, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05327020213007927, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03581608086824417, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03117125853896141, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02982798032462597, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.029504254460334778, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01791290007531643, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015298651531338692, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.015165558084845543, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.014025037176907063, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01382711622864008, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009355799295008183, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00926161091774702, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008916037157177925, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005979261826723814, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11486121267080307, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1077180728316307, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10473250597715378, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.0951508954167366, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05385003611445427, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05093665421009064, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06133287400007248, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.056585200130939484, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05462168902158737, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04845626279711723, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.046192947775125504, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03113231249153614, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.027021627873182297, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.025785788893699646, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02548879012465477, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01556492131203413, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013187212869524956, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013066483661532402, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01208416000008583, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011898023076355457, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008090551942586899, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007948826067149639, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007688571698963642, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0050528384745121, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2518678307533264, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.23661166429519653, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.23170016705989838, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.21063584089279175, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11840119957923889, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11287453770637512, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1322871446609497, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12205415964126587, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12009064108133316, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10671866685152054, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10144868493080139, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0671636089682579, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05826537311077118, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05659510940313339, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05619816109538078, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03351213037967682, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028717007488012314, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.028587481006979942, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.026280520483851433, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.026025181636214256, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017211709171533585, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016765743494033813, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016585953533649445, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010266670025885105, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.21581777930259705, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1718047708272934, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.15842215716838837, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.12608712911605835, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09333579242229462, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07764710485935211, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11671547591686249, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10329767316579819, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09930906444787979, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07088030874729156, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06518365442752838, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.057853490114212036, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.051587142050266266, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04711677134037018, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0459514744579792, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03014007769525051, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027830202132463455, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027585912495851517, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02411547675728798, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.023447100073099136, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.018169129267334938, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.02145369164645672, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016865897923707962, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01830459199845791, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.21346192061901093, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.20088081061840057, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1969432532787323, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17933477461338043, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10070939362049103, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09618101269006729, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.111718088388443, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10334525257349014, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10205955803394318, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0909128412604332, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08648233860731125, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05697101727128029, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04953622817993164, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.048324912786483765, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04804472625255585, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.028545793145895004, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02496878243982792, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02488977648317814, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.022987335920333862, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.022808389738202095, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015167458914220333, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015210798010230064, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014777109026908875, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010294250212609768, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2529408931732178, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23806846141815186, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2334827482700348, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.21262921392917633, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11910192668437958, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.1138007640838623, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.13227863609790802, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12220081686973572, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.1207243800163269, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10756082087755203, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10232236981391907, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0672467052936554, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05843697488307953, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05703551694750786, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.056700367480516434, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.033686645328998566, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.029150232672691345, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.029058923944830894, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02677091769874096, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.026561396196484566, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017755519598722458, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.017285479232668877, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.017301175743341446, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011077053844928741, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.2458762228488922, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.22332613170146942, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.21360638737678528, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.19051212072372437, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11350597441196442, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.10430441051721573, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13305802643299103, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.1228988915681839, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.11653288453817368, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.09917664527893066, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09357906877994537, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06755901128053665, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05881914123892784, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05455329641699791, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.053519513458013535, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.033829379826784134, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.028352493420243263, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.027948064729571342, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.025495944544672966, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.024813316762447357, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01777372695505619, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018002359196543694, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.016366388648748398, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012146721594035625, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13771896064281464, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1292724460363388, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12611570954322815, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11459066718816757, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06469005346298218, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06137974560260773, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07283028960227966, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06738810241222382, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06560105830430984, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.058238156139850616, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.055308997631073, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03695986792445183, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.032199837267398834, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.030950874090194702, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0306545440107584, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.018452415242791176, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015777552500367165, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01566178724169731, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.014446832239627838, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.014257268980145454, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009533567354083061, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009390327148139477, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009119962342083454, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0058858348056674, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12082985788583755, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.113387331366539, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11050619184970856, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10039875656366348, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.056732382625341415, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.053773947060108185, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06429757177829742, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05932445079088211, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05753035470843315, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05108760669827461, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.048667315393686295, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03263276442885399, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.028327174484729767, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.027160847559571266, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02687912806868553, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016308248043060303, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013860034756362438, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013751204125583172, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012702921405434608, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012521074153482914, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008453860878944397, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00828617438673973, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008065865375101566, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0052163926884531975, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.258271723985672, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2427128553390503, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2377208173274994, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.21617554128170013, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12156031280755997, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11587467789649963, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13548146188259125, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12515661120414734, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12327362596988678, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10954847931861877, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10407844185829163, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06885440647602081, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.059746965765953064, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.058109745383262634, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05773129686713219, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.034340787678956985, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02948370948433876, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.029353994876146317, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02696964330971241, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.026727698743343353, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017637409269809723, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01717829518020153, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01702987402677536, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010476958937942982, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2193266749382019, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1756407618522644, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.16085094213485718, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.14527389407157898, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09968628734350204, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08204120397567749, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11863679438829422, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10897860676050186, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10502314567565918, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07674220949411392, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07355996966362, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06099153310060501, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.052500080317258835, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04823549836874008, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04719087854027748, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.030663950368762016, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.025451024994254112, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.025195294991135597, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.021001029759645462, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02026059478521347, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016725588589906693, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016765093430876732, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015397538430988789, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011875721625983715, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.21757446229457855, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.20470309257507324, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.20071746408939362, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.18267816305160522, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10268046706914902, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09803152084350586, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11400721222162247, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1054152324795723, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10405972599983215, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09264251589775085, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.0881030410528183, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05815005674958229, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.050474025309085846, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.049234531819820404, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04894232749938965, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.029106300324201584, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.025391442701220512, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.025304781273007393, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.023358898237347603, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02317417971789837, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015406928956508636, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015408702194690704, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01500505767762661, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010341910645365715, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.25580382347106934, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.24058333039283752, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.23591823875904083, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.21470578014850616, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.12045516073703766, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11504611372947693, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1335383802652359, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12360182404518127, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.12209858745336533, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10869935899972916, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10323216766119003, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06791404634714127, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05907076224684715, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05763471871614456, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0572902150452137, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.033914368599653244, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02933177538216114, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02923436649143696, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.026895925402641296, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.026677260175347328, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017594577744603157, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01721617951989174, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01712212711572647, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010767722502350807, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.2513626217842102, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.22821572422981262, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.2184552699327469, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.19452612102031708, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11599117517471313, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.10669344663619995, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13627566397190094, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12539640069007874, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.11915194243192673, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.10126390308141708, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09549792855978012, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06932157278060913, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05995047464966774, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05569252371788025, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05466294288635254, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03494298830628395, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02882523275911808, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.028424421325325966, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.025870176032185555, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.025179626420140266, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018807262182235718, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018129494041204453, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.017418939620256424, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012017213739454746, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1435406655073166, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.13466349244117737, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.13133473694324493, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11928947269916534, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06742886453866959, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06396865844726562, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07607991248369217, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.07028496265411377, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.068416528403759, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0606900155544281, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05765891075134277, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.038665540516376495, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03357844427227974, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03227793425321579, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.03197391703724861, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.019330667331814766, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.016491111367940903, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01637481525540352, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.015103241428732872, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.014903336763381958, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.010066845454275608, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009859037585556507, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009627842344343662, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006238679401576519, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12486571073532104, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11718656122684479, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11416196823120117, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10371480882167816, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05869261920452118, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05561112239956856, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06655724346637726, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.061369579285383224, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05954475700855255, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05282501131296158, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.050316207110881805, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03384054824709892, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.029324810951948166, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02811230905354023, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02781839109957218, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016921505331993103, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01436435617506504, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014248212799429893, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01315830647945404, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012973182834684849, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008800733834505081, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008613025769591331, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008394896052777767, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005459108389914036, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2598126530647278, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.243971049785614, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.238993301987648, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.21715997159481049, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12232062965631485, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11658687889575958, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13633498549461365, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12592002749443054, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12405414879322052, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.11014192551374435, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10462217777967453, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06929140537977219, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06010963022708893, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05847308412194252, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05808013305068016, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.034552864730358124, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.029690027236938477, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.029569940641522408, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02715139277279377, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.026895461603999138, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01777784712612629, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017340384423732758, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017172671854496002, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01066905539482832, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.22738789021968842, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1972694993019104, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.18653206527233124, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.15327322483062744, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10367854684591293, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09317345917224884, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12322625517845154, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11331174522638321, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10881482064723969, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08370672166347504, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07590785622596741, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06325796991586685, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05457652360200882, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.050167106091976166, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0490824319422245, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03169528394937515, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026524202898144722, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026234077289700508, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.0226533655077219, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.021934418007731438, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017151348292827606, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017524704337120056, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01575123891234398, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.012505220249295235, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.21548977494239807, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.20260301232337952, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1985606998205185, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1805335283279419, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10166957974433899, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.0970291942358017, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11290064454078674, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10440956801176071, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10309261828660965, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09164587408304214, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08709041029214859, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05762890726327896, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05002376064658165, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.048782460391521454, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04849208891391754, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.028807682916522026, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02514643408358097, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.025063861161470413, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02310781739652157, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.022922568023204803, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015203932300209999, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015247534029185772, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014796088449656963, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010204232297837734, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2574049234390259, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.24201001226902008, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.23725587129592896, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.21576347947120667, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.12133899331092834, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.1158151775598526, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.13466773927211761, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12458965927362442, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.12302535772323608, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10935890674591064, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10393189638853073, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06868230551481247, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0595763698220253, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05810174718499184, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.057759806513786316, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.034304507076740265, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.029703037813305855, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02960195578634739, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02723655290901661, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.027013584971427917, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01796303316950798, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01762106828391552, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.017482079565525055, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011293383315205574, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.2563475966453552, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.23276296257972717, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.22275590896606445, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.19864974915981293, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11842291057109833, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.10888207703828812, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13827727735042572, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12801338732242584, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.12149323523044586, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.10338623076677322, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09735056757926941, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07028317451477051, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.061219897121191025, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.056890375912189484, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05583684891462326, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03519764170050621, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.0294977817684412, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.029082583263516426, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.02649792842566967, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.025797098875045776, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.0185680091381073, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018625300377607346, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01719110831618309, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012447591871023178, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1397954821586609, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1311318427324295, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12775273621082306, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11592037975788116, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06568685919046402, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06226683780550957, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0740787535905838, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0686163380742073, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0666445717215538, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05905083939433098, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05603233724832535, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03758474066853523, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.032761100679636, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.031459465622901917, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.031139789149165154, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.018785148859024048, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.016063477843999863, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01594221591949463, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.014697052538394928, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.014498092234134674, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00974509958177805, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00960183423012495, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00931343249976635, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006064957939088345, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12192640453577042, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11431078612804413, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11133220791816711, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.100958451628685, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05728559195995331, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.054250046610832214, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06495102494955063, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0599750354886055, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.058119479566812515, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05150359496474266, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04896000772714615, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0329955592751503, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.028657982125878334, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.027440687641501427, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.027149997651576996, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01649082452058792, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01403453666716814, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013916225172579288, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012846970930695534, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01266056764870882, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008568717166781425, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008436017669737339, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00817080121487379, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005370006430894136, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2603345513343811, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.24432168900966644, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2393188625574112, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.21725833415985107, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12260803580284119, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11681670695543289, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13666538894176483, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12620118260383606, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12433983385562897, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.11030476540327072, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10467088222503662, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06942017376422882, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06025642156600952, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05859442055225372, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.058207783848047256, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.034631162881851196, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.029731029644608498, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.029615962877869606, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.027173347771167755, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.026918942108750343, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017781740054488182, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01734335348010063, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017173731699585915, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010616064071655273, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2035459578037262, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.16554293036460876, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1488579511642456, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.12845085561275482, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09337904304265976, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07569509744644165, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11640153080224991, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10659810900688171, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09736555814743042, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07102377712726593, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06700567156076431, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05988193303346634, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05123984441161156, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04517094045877457, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04364832863211632, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02993372082710266, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02380690537393093, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.023185648024082184, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01947821117937565, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.018405569717288017, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01592550054192543, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01599561609327793, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.013881976716220379, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011041996069252491, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.21450941264629364, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.20142507553100586, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.19740226864814758, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17952172458171844, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10125897824764252, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09657347947359085, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11248279362916946, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10402090847492218, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10267547518014908, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09118103235960007, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08656686544418335, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05738542973995209, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.049850672483444214, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.0486019104719162, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04830112308263779, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.028737954795360565, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.0250999853014946, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.025013411417603493, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.023056108504533768, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.022870086133480072, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015250923112034798, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015284402295947075, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014844514429569244, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010320899076759815, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.25933390855789185, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.24374255537986755, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2388661652803421, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.21725767850875854, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.12227614969015121, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11665000766515732, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.13562063872814178, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12555718421936035, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.12400839477777481, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.11011990159749985, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10457190126180649, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06904345005750656, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.060002401471138, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.058517709374427795, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.058162663131952286, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03446393460035324, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02982446551322937, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.029722632840275764, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02730703353881836, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02708086185157299, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017893770709633827, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.017553584650158882, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.017401577904820442, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011052297428250313, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.26375263929367065, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.23979929089546204, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.23004868626594543, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.20498116314411163, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.12199123203754425, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.11247841268777847, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.14284788072109222, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.13117876648902893, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.12517769634723663, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.10656248033046722, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.10033369064331055, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07229632139205933, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06274872273206711, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05859386920928955, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05758237838745117, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03634541109204292, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.030338436365127563, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.029952015727758408, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.027249766513705254, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.026570336893200874, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.019363081082701683, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01904720440506935, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.017997970804572105, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01268566120415926, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.32.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.14253760874271393, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.13350358605384827, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.13009509444236755, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11789321899414062, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06696080416440964, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0634387880563736, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07559555023908615, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06995486468076706, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0679473727941513, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06010802090167999, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05697419494390488, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.038376517593860626, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03340144082903862, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03205699101090431, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.03173593431711197, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01916263997554779, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.016382191330194473, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.016253989189863205, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.014971100725233555, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.014768672175705433, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009933224879205227, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009812982752919197, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009481102228164673, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006213667336851358, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.32.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12258430570363998, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11488756537437439, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11181065440177917, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10132797807455063, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.057631347328424454, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05450136214494705, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06550832092761993, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0603969469666481, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05845816060900688, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05174984410405159, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04923274368047714, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.033278632909059525, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02887638285756111, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.027602504938840866, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02729891799390316, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016635335981845856, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.0141190430149436, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013992849737405777, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01291059423238039, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012717878445982933, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008640194311738014, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008493002504110336, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008221711963415146, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005394922103732824, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.32.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2666378617286682, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.25013983249664307, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.24500074982643127, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.22223734855651855, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12559859454631805, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.1196097657084465, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.14008848369121552, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12929214537143707, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12742038071155548, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.11286390572786331, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10705766826868057, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07119233906269073, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06176822632551193, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.060052502900362015, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05965561792254448, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03551985323429108, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.030489567667245865, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.03036448545753956, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.027830712497234344, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.027567263692617416, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.018260737881064415, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017809713259339333, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017630131915211678, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010942175053060055, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.32.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.21426883339881897, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.17426367104053497, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1593007594347, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.12601444125175476, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09573674947023392, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08083982765674591, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11800787597894669, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10834047198295593, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10253176838159561, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06998652219772339, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06576306372880936, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06047772988677025, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05193425342440605, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04618930071592331, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.044751930981874466, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03028193861246109, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02413081005215645, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.023739073425531387, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01908811368048191, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0180355291813612, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016201365739107132, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015869805589318275, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014380455017089844, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01066843792796135, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.32.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2117307484149933, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.19883225858211517, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.19480273127555847, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17703409492969513, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.1000327542424202, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09538166970014572, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11106348782777786, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10277841985225677, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10145096480846405, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09000498056411743, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.0855071023106575, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.056697066873311996, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.049262888729572296, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04802324250340462, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.047719135880470276, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.028350355103611946, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.024791982024908066, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02471255511045456, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02276269905269146, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.022577693685889244, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01496674120426178, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015106049366295338, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014561077579855919, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010195191018283367, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.32.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2583141028881073, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.24263794720172882, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.237821564078331, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.2160738855600357, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.1218586191534996, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11623341590166092, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1352073699235916, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1251426488161087, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.12357936054468155, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10969015210866928, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10408734530210495, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06884082406759262, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05983772501349449, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05833492428064346, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05798479542136192, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03437347337603569, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.029747743159532547, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.029648929834365845, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02722538262605667, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.026996750384569168, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017848817631602287, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.017538242042064667, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.017358247190713882, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011071354150772095, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.32.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.260942280292511, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.23714786767959595, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.22739417850971222, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.202619269490242, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.12072775512933731, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.11125294864177704, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.14098338782787323, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12990902364253998, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.12389373034238815, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.1054183840751648, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09918234497308731, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07172194123268127, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.062144458293914795, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.057983335107564926, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05697143077850342, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.036135103553533554, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.03000338189303875, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.029621878638863564, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.02692871354520321, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.026260770857334137, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01943686418235302, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018812544643878937, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.018105152994394302, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01250909548252821, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.33.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13957881927490234, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.13073092699050903, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1272958219051361, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11532481014728546, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06563456356525421, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.062091145664453506, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07427609711885452, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06868178397417068, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06660216301679611, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05889122188091278, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.055872201919555664, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03773537278175354, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03282567113637924, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0314345508813858, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.031107794493436813, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.018857432529330254, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.016067098826169968, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.015933290123939514, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.014676575548946857, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.014459867961704731, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009781042113900185, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009639034047722816, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009321202524006367, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00610439945012331, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.33.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12158478796482086, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1138368472456932, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11075715720653534, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10036110877990723, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05713912472128868, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.054035477340221405, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06479880213737488, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05988457053899765, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.057976268231868744, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05127837881445885, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.048704784363508224, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03293570876121521, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02863107994198799, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02738090790808201, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.027084317058324814, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016462130472064018, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014002149924635887, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01388268731534481, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012795642949640751, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012605520896613598, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008549113757908344, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008420511148869991, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00814061425626278, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005345809739083052, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.33.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.26354241371154785, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.24705514311790466, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.24181094765663147, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.21938180923461914, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12415698170661926, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.1181798204779625, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13853609561920166, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.1279280185699463, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12594953179359436, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.11153700947761536, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10569315403699875, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07032032310962677, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06110231950879097, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.059386223554611206, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.058981239795684814, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.035110365599393845, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.030122261494398117, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.029992956668138504, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.027483684942126274, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.027217837050557137, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01800975762307644, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01756257750093937, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017381221055984497, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010713271796703339, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.33.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.22568389773368835, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.18195828795433044, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.16633471846580505, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.14317630231380463, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10203615576028824, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08417408913373947, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12427084892988205, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11281151324510574, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1077130138874054, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07752255350351334, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07226547598838806, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06360641866922379, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05456388369202614, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.049675170332193375, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04848471283912659, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03213793784379959, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026761291548609734, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02644699066877365, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02217746526002884, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02136237919330597, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01807326264679432, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018368640914559364, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016615528613328934, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.013719463720917702, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.33.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.21251778304576874, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.19951879978179932, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1954454481601715, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17744328081607819, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10045497864484787, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.0957169383764267, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11180995404720306, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10324129462242126, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10187714546918869, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0902939960360527, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08569908887147903, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05708928033709526, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04951310157775879, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04824039340019226, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04793765768408775, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02859009988605976, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.024954192340373993, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.024870622903108597, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.022902052849531174, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.022715523838996887, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015224387869238853, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015274685807526112, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014816196635365486, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010392626747488976, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.33.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.259219765663147, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.24333226680755615, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.23847246170043945, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.21651627123355865, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.12237260490655899, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11665802448987961, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.13611505925655365, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12576256692409515, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.12413951009511948, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.11005862057209015, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10440212488174438, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06945519894361496, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.06019805371761322, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.0586710050702095, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05830990895628929, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03472225368022919, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.030044035986065865, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02994084544479847, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.027505915611982346, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02727879211306572, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01831282302737236, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.0179214458912611, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.017819035798311234, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01160278171300888, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.33.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.2607234716415405, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.23625189065933228, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.22532694041728973, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.2002032995223999, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.12055862694978714, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.11031263321638107, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.14304345846176147, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.1314721256494522, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.12389007210731506, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.10494779050350189, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09875252842903137, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07273271679878235, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06301098316907883, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.058001767843961716, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.056765612214803696, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03675104305148125, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.03015713207423687, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.029664065688848495, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.027054574340581894, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.02624700777232647, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.019986936822533607, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.019267965108156204, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.018427036702632904, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012976461090147495, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.34.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.14800576865673065, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.13850869238376617, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.13480257987976074, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.12202300876379013, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06960449367761612, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06581107527017593, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07875797897577286, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.07288598269224167, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0706467404961586, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06235060468316078, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05904500186443329, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03995641693472862, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.034817714244127274, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03334802761673927, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.032997243106365204, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.019967684522271156, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.017065849155187607, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01692475564777851, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.015574561432003975, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.015346134081482887, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.010377473197877407, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.010268764570355415, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009893874637782574, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006532405037432909, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.34.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12590861320495605, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11782492697238922, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11454320698976517, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1036335676908493, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05920395255088806, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.055915623903274536, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06747344136238098, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06227283552289009, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06009450927376747, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05306816101074219, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05037910118699074, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.034275658428668976, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.029777616262435913, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.028383031487464905, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.028052913025021553, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017149068415164948, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014548834413290024, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014409802854061127, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013290951028466225, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013074406422674656, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00892694666981697, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008804857730865479, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00847066380083561, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005634002853184938, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.34.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.26944389939308167, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.25240230560302734, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.24690556526184082, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.22384116053581238, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.126972496509552, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.1207643449306488, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1416982263326645, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.13085612654685974, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12882737815380096, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.11386656761169434, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10784469544887543, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07203792780637741, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06250566244125366, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.060728706419467926, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.06031200289726257, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03593200445175171, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.030822334811091423, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.030693313106894493, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.028084121644496918, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.027811724692583084, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.018449459224939346, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017992135137319565, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017793698236346245, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011010612361133099, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.34.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.20578911900520325, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.16916868090629578, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.15430007874965668, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.12667638063430786, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09324788302183151, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07829368859529495, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11469514667987823, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10595978796482086, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09824571013450623, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07045011222362518, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06551026552915573, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05863259360194206, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05079050734639168, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.045002859085798264, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04354047402739525, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.029315318912267685, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.023589864373207092, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02307039313018322, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.019143033772706985, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01810338906943798, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015477396547794342, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01563667505979538, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.013601045124232769, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010595016181468964, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.34.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2143825739622116, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.20109201967716217, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.19697296619415283, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17870685458183289, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10132676362991333, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09653982520103455, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11269141733646393, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10415420681238174, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10277573019266129, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09102606028318405, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08632799983024597, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05756146088242531, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04997839778661728, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.048685286194086075, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.048370372503995895, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02881217747926712, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.0251755528151989, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.025091135874390602, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02308734692633152, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02289252169430256, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015290092676877975, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015395542606711388, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014868524856865406, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010445095598697662, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.34.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2544366717338562, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23873239755630493, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.23386551439762115, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.21222129464149475, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.12014228105545044, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11447803676128387, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.13366135954856873, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12346658110618591, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.12187381833791733, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10792456567287445, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10232330858707428, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06814030557870865, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0590926855802536, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.057578474283218384, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05721749737858772, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03408296778798103, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.029433418065309525, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02933589369058609, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.026921121403574944, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.026691168546676636, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01790592074394226, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.017488006502389908, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01741006039083004, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01121511496603489, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.34.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.26080161333084106, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.23561763763427734, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.22438029944896698, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.19920535385608673, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.12025774270296097, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.10990443825721741, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.14247961342334747, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.13153450191020966, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.12390810251235962, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.10458265244960785, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09829897433519363, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07246734946966171, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06289232522249222, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.057839054614305496, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05659523978829384, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03629457205533981, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.030011072754859924, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.029522132128477097, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.026877256110310555, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.026051731780171394, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.019186027348041534, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.0190911665558815, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.017531394958496094, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012723155319690704, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.35.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1456860452890396, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.13617493212223053, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.13253571093082428, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11979207396507263, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06855863332748413, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0647793710231781, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07760564982891083, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.07176610827445984, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0695970430970192, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06130602955818176, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05803011730313301, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03940538689494133, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.034309472888708115, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03284997120499611, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.03249576687812805, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.019696002826094627, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.0167964156717062, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.016658013686537743, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.015309642069041729, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.015081267803907394, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01022714376449585, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.010095017962157726, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009743228554725647, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00641837902367115, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.35.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12355300784111023, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1154937893152237, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11226395517587662, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10148033499717712, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.058082371950149536, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05485258623957634, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06616444885730743, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0610603392124176, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05897819250822067, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05198810622096062, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.049302782863378525, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03362847864627838, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.029176972806453705, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.027841167524456978, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.027515288442373276, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01680127903819084, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014267265796661377, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014135759323835373, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013018405064940453, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012813543900847435, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008774258196353912, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00863503385335207, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008332007564604282, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005529638379812241, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.35.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2656119763851166, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.24867990612983704, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.24325662851333618, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.22027066349983215, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12530910968780518, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11912120878696442, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.139911487698555, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12917698919773102, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12717166543006897, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.11224028468132019, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10626229643821716, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07115001231431961, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.061712831258773804, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.059973496943712234, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05953667685389519, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.035491663962602615, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.030445534735918045, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.030315212905406952, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02772234007716179, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.027451293542981148, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.018244709819555283, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017804689705371857, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017605675384402275, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01092496793717146, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.35.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2068265676498413, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.17396856844425201, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.16340371966362, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.13555122911930084, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09532458335161209, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08269968628883362, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11077167093753815, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10188371688127518, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09911353886127472, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07253868132829666, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06739666312932968, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05673249065876007, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.048938266932964325, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04594288766384125, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04522385448217392, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.028347207233309746, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.024029452353715897, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02384883537888527, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01957823522388935, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.019056346267461777, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015112542547285557, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01538770366460085, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014154031872749329, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010690929368138313, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.35.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.21259108185768127, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.19932034611701965, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.19516700506210327, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17695704102516174, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10054807364940643, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09570246934890747, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11181596666574478, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10341320186853409, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.1020229160785675, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09020216763019562, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08545850962400436, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.057025812566280365, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.049561139196157455, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.048256948590278625, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04794304072856903, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.028477361425757408, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.024914035573601723, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02482636272907257, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02281837910413742, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.022618073970079422, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015023262239992619, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015186134725809097, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014597027562558651, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010216481983661652, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.35.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24899378418922424, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23345457017421722, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22860684990882874, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20733997225761414, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11765700578689575, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11204861104488373, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1308434158563614, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12096704542636871, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.1193675845861435, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10558100044727325, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10004423558712006, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06665077805519104, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05782495066523552, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05632951855659485, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05596992000937462, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03322724997997284, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.028735386207699776, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.028635062277317047, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.026232346892356873, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02600865811109543, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017298350110650063, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016961554065346718, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016794471070170403, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01071450486779213, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.35.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.2598722279071808, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.23370003700256348, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.22191016376018524, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.19730786979198456, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.1196625828742981, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.10887059569358826, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.14284752309322357, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.1315758228302002, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.12363719940185547, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.10386919230222702, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09786569327116013, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07263925671577454, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06289612501859665, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.057520486414432526, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05618727579712868, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03631315752863884, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02983638271689415, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.029321901500225067, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.02668633498251438, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.02580999955534935, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01920435205101967, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01903325505554676, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.017436467111110687, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01262176875025034, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.36.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.14072754979133606, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.13144874572753906, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1277136504650116, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11531274020671844, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06614167243242264, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06238512322306633, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07513537257909775, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06953749805688858, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06716226041316986, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05910179391503334, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05592288449406624, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.038170408457517624, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0332561694085598, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03171144425868988, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.03133999928832054, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01909669302403927, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.016237927600741386, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.016087351366877556, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01478987280279398, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01455115620046854, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009937161579728127, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009810378775000572, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00943457055836916, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006261755712330341, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.36.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11663758754730225, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1089160144329071, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10566753149032593, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09543229639530182, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05477086827158928, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05157100409269333, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06262091547250748, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.057856470346450806, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.055623237043619156, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04895668476819992, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.046372197568416595, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.031775400042533875, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.027663933113217354, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02626374550163746, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02592414990067482, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01589307375252247, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013460096903145313, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013321532867848873, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01227261871099472, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012056637555360794, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008285787887871265, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008182575926184654, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007834547199308872, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005250041373074055, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.36.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2638939619064331, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.24705888330936432, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.24157024919986725, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.21862253546714783, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12444271147251129, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11827076971530914, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13907486200332642, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12840189039707184, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12631353735923767, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.11146567761898041, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10541485249996185, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07071048021316528, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06134243682026863, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.059564538300037384, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05913633108139038, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03529713675379753, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.030284611508250237, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.03015492856502533, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.027580946683883667, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.027307502925395966, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.018161684274673462, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017774520441889763, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01750568300485611, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011004392057657242, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.36.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1983020305633545, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.16346371173858643, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.15155458450317383, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.12557266652584076, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.08973269164562225, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07668188214302063, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10785672068595886, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09894552826881409, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09528575837612152, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06959334760904312, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06277905404567719, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05536438897252083, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04766338691115379, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04349270835518837, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04247390851378441, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.027804814279079437, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02307284250855446, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.022834433242678642, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.019203482195734978, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01848849654197693, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015089874155819416, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015412911772727966, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.013763395138084888, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011060239747166634, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.36.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.19748331606388092, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.18502125144004822, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.18110954761505127, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1641671359539032, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09339165687561035, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08887313306331635, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10390295833349228, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09610826522111893, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09478549659252167, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08373121917247772, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07928822934627533, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05305137857794762, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04609903320670128, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04486706107854843, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.044574130326509476, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02654990367591381, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.023202529177069664, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02312072180211544, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02124819904565811, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.021066537126898766, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014052662067115307, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014193490147590637, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013631061650812626, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009620497934520245, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.36.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2318764477968216, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21732860803604126, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21277838945388794, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1929045468568802, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10955505073070526, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10427965968847275, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12176735699176788, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11266551166772842, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.111173115670681, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09828676283359528, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09304900467395782, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06206955015659332, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05391300469636917, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05248981714248657, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.052149221301078796, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03100372478365898, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.026815449818968773, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02671980857849121, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024477345868945122, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02426522970199585, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016161900013685226, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01589399017393589, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015686826780438423, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010138092562556267, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.36.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.2509056329727173, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.22533045709133148, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.21442750096321106, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.19050383567810059, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.1156507357954979, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.10534292459487915, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13769790530204773, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12638701498508453, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.119561567902565, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.1002376452088356, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09435372799634933, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07008498162031174, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06060028448700905, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05574686825275421, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.054564427584409714, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03522950038313866, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.029176194220781326, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.028736859560012817, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.026133012026548386, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.02534446492791176, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01892843283712864, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01886998489499092, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01733170822262764, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01299352664500475, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.37.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1283239722251892, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11976051330566406, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11632312834262848, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10493409633636475, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06032723933458328, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.056838519871234894, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06885109096765518, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06356868892908096, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06126976013183594, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05385749787092209, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05105637013912201, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03497304394841194, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.030418016016483307, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.028953813016414642, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02859780378639698, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01751730963587761, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014912191778421402, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01476381253451109, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013600188307464123, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013376090675592422, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009190193377435207, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009149671532213688, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008712138049304485, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00601379107683897, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.37.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10936964303255081, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10201779007911682, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09885887801647186, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08920135349035263, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.051335614174604416, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04826195910573006, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05897423252463341, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0544513538479805, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05215573310852051, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04585975781083107, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04344912990927696, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02996859885752201, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.026056883856654167, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.024644488468766212, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.024305345490574837, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015005253255367279, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012677649967372417, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012531541287899017, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011564367450773716, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011348189786076546, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007875834591686726, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0077864560298621655, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007413514889776707, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005071079358458519, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.37.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2538524568080902, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.23732006549835205, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.23194606602191925, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20972946286201477, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.1197405532002449, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11366229504346848, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13400009274482727, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12367390841245651, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12154557555913925, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10708333551883698, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10116922110319138, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06816557794809341, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0591023713350296, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05730888620018959, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05687544867396355, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.034019775688648224, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.029172684997320175, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.029039228335022926, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.026544388383626938, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.026268647983670235, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0175604410469532, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01718786731362343, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016909003257751465, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010713638737797737, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.37.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.115054652094841, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09493646025657654, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08834804594516754, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.0752960592508316, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.051509372889995575, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04457775503396988, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06188022345304489, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05645947903394699, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05472914129495621, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04041333124041557, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03726692125201225, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03183327615261078, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.027821650728583336, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.025604717433452606, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02508322335779667, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016216417774558067, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01466691680252552, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014553952030837536, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01277826726436615, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01244279183447361, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00942198932170868, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.010928211733698845, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008773069828748703, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009066842496395111, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.37.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1872493475675583, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1754310429096222, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1716984063386917, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15563109517097473, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08855569362640381, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08423905819654465, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09853851795196533, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09114690124988556, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.0898687019944191, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07939605414867401, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07518406957387924, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0503690242767334, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04376349598169327, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.0425758957862854, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0422917976975441, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.025198115035891533, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.022091465070843697, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.022010160610079765, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.020252930000424385, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.020070821046829224, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013420464470982552, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01364068128168583, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013028038665652275, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009388787671923637, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.37.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2178260087966919, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.20411138236522675, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.19981630146503448, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1811056286096573, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10295366495847702, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09795323014259338, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11463364213705063, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10592256486415863, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10448053479194641, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0922817662358284, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08740726858377457, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05853964388370514, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05073539912700653, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.049373116344213486, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04905218631029129, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.029250431805849075, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02533581107854843, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02524128369987011, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.023149680346250534, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.022948220372200012, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015440486371517181, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015201728790998459, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014995983801782131, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009937960654497147, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.37.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.23468336462974548, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.20866802334785461, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.19778886437416077, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.1759638488292694, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.1079087182879448, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.09740322083234787, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12947122752666473, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11852313578128815, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.11195002496242523, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.09293142706155777, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08770168572664261, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06614775955677032, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.057059504091739655, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05224090442061424, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05105983838438988, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03340698406100273, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.027727697044610977, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.027313973754644394, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.02484157681465149, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.024073714390397072, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018365968018770218, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018481072038412094, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01686285249888897, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.013300688937306404, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.38.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12741319835186005, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11902651935815811, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11549355089664459, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10434363782405853, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.059909723699092865, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05646860599517822, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0686575248837471, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06321609020233154, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06083964556455612, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05356733500957489, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05085968226194382, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03489409387111664, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03025839664041996, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.028778791427612305, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.028424713760614395, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017498821020126343, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014810455031692982, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014658669009804726, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013512532226741314, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013285360299050808, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009197363629937172, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009072144515812397, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00872624758630991, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00592708820477128, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.38.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10637273639440536, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09935248643159866, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09621839225292206, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.0868639275431633, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04999585077166557, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.046989213675260544, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.057499293237924576, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05310776084661484, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05075961351394653, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04469093680381775, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.042363449931144714, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.029227105900645256, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02541002631187439, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.024000387638807297, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.023659247905015945, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014641103334724903, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012356755323708057, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012208670377731323, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011275173164904118, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011058936826884747, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007700261194258928, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0076116942800581455, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007242769002914429, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004974815994501114, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.38.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.25758692622184753, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2409701943397522, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.23574481904506683, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.21325115859508514, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12158041447401047, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11553439497947693, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13580191135406494, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12535157799720764, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1234029158949852, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10875383019447327, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10282554477453232, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06912943720817566, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05992142856121063, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.058220215141773224, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.057809099555015564, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.034503549337387085, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.029635872691869736, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.029501499608159065, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.026952315121889114, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.026689574122428894, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017770899459719658, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01742793247103691, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017141098156571388, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01085445936769247, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.38.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.14113052189350128, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1145516112446785, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10621962696313858, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08651363104581833, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06426291912794113, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.054442211985588074, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07569321990013123, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06903427839279175, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06764860451221466, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04734933003783226, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04346741363406181, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03908909484744072, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0337645560503006, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03164960443973541, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.031117822974920273, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01997358724474907, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.017573753371834755, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.017491072416305542, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.014557026326656342, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.014216786250472069, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.011550153605639935, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.012486027553677559, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.010924575850367546, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010021872818470001, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.38.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1500970870256424, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.14059117436408997, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.13764001429080963, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1247745007276535, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.07132183015346527, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.06786604970693588, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.07938063144683838, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.07338595390319824, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.07236466556787491, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.06403990834951401, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06075957417488098, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.041059911251068115, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.03590856119990349, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03497494384646416, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.034755971282720566, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02076113596558571, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.019289853051304817, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.019226955249905586, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.01794896088540554, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.017818091437220573, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.011837770231068134, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013376823626458645, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.011554807424545288, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010801205411553383, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.38.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.16820751130580902, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1575963944196701, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.15424814820289612, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.13984902203083038, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.07994294911623001, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07611309736967087, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.08927832543849945, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08225001394748688, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08111303299665451, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07177118957042694, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06821952760219574, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04606484994292259, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04022872820496559, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.039199039340019226, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.038961123675107956, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.023375870659947395, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.021608781069517136, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.021538304165005684, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.020107751712203026, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019962549209594727, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013453999534249306, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01498076505959034, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013132072985172272, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.012076462619006634, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.38.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.1803094744682312, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.15804535150527954, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.149444118142128, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.1326626092195511, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.08260305970907211, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.07391922920942307, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1007566824555397, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.09077395498752594, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.08600421994924545, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.07068555057048798, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.06699793040752411, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.05170339718461037, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.044422298669815063, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.040601905435323715, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.039702169597148895, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.02664061263203621, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.022580858319997787, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.022240402176976204, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.020401451736688614, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.01978539302945137, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.015717308968305588, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01617339625954628, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.014544585719704628, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012767083011567593, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.39.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11449676752090454, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1071024239063263, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10384423285722733, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09392326325178146, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05377450957894325, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05064533278346062, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06216039881110191, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0569482259452343, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05459601804614067, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0481642484664917, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.045742280781269073, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03162467107176781, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02724582329392433, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02581809088587761, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.025477658957242966, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0158434696495533, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013303905725479126, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013159278780221939, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012163136154413223, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011946849524974823, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008343291468918324, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008192078210413456, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007882912643253803, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0053788102231919765, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.39.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09060078859329224, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.08467929810285568, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08191218227148056, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07395492494106293, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.042483098804950714, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0398794449865818, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.04923050105571747, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04530775919556618, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04312834143638611, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.03803214058279991, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.036060892045497894, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.025038760155439377, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02169751562178135, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.020406052470207214, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.020094772800803185, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.012557437643408775, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01054899301379919, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.010410299524664879, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.009648958221077919, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.009450857527554035, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006642072461545467, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00656916992738843, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006219842471182346, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004359330516308546, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.39.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.24309584498405457, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2277287095785141, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2227751910686493, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20155756175518036, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11477702111005783, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10899194329977036, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13000614941120148, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11847883462905884, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11646796017885208, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.1028614193201065, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09775634855031967, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06648389995098114, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05699130892753601, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.055233389139175415, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05477217584848404, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.033657465130090714, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028425803408026695, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02827160805463791, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02599276229739189, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02571702003479004, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017640814185142517, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017147991806268692, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016983961686491966, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011191735044121742, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.39.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12944145500659943, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09494597464799881, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.0831584483385086, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.0725867822766304, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.056429050862789154, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04361985996365547, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07582315057516098, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06413707882165909, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06120150908827782, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.041684504598379135, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04044908285140991, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03867419809103012, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03324328362941742, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02985532395541668, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.029006725177168846, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.020915677770972252, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01931452937424183, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0191477183252573, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.017171723768115044, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.016716545447707176, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.013559016399085522, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01619364693760872, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.012651094235479832, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014614530839025974, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.39.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1366167962551117, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.12785756587982178, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1251058578491211, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.11333262920379639, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.06438238173723221, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.06122063845396042, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.07183001935482025, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.06631357222795486, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.06533738970756531, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.05769690126180649, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.054675888270139694, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.03661682456731796, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.03178950771689415, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.030919305980205536, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03070669062435627, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.01833227649331093, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.015997515991330147, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.015937834978103638, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.01464657299220562, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.014519158750772476, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.00967350136488676, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.009805293753743172, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.009379303082823753, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.0066672079265117645, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.39.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1352883130311966, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.12663419544696808, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1238933801651001, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.11224804073572159, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.0641622319817543, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.06103222072124481, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.07174362242221832, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.06610909849405289, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.06511332839727402, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.05757446959614754, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.05471492558717728, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.03701363131403923, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.03213542699813843, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.031274016946554184, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.031070148572325706, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.018689017742872238, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.01693304441869259, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.01687568798661232, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.015682142227888107, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.015560214407742023, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.010494113899767399, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.011387230828404427, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.010227606631815434, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008870186284184456, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.39.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1762188946759258, + "total_bits": 154030336.0, + "err": 0.09864497929811478, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.08603864908218384, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.08020181953907013, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7294596354166667, + "total_bits": 193188096.0, + "err": 0.07101578265428543, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.045254725962877274, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7294596354166667, + "total_bits": 263966976.0, + "err": 0.03983591869473457, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.058001503348350525, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.051144812256097794, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1762188946759258, + "total_bits": 224809216.0, + "err": 0.04709978029131889, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.525755931712963, + "total_bits": 249549056.0, + "err": 0.038624703884124756, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.03691858425736427, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.029862860217690468, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.025453835725784302, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.02280414290726185, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.022148702293634415, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.015506167896091938, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.013478174805641174, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3289966724537035, + "total_bits": 377180416.0, + "err": 0.013239494524896145, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.525755931712963, + "total_bits": 391106816.0, + "err": 0.012377982959151268, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.731774450231481, + "total_bits": 405688576.0, + "err": 0.011996288783848286, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.009300390258431435, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.010492220520973206, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.008459619246423244, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.008914678357541561, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + } + ], + "last_module_idx": 82, + "base_perplexity": 6.491815677267153 +} \ No newline at end of file